In [1]:
import ray.tune as tune
from ray.rllib.agents.ppo import PPOTrainer
from ray.tune import register_env
from envs.env_creator import env_creator, ibgym_env_creator_rllib
from envs.IBGym_mod_envs import IBGymModded
from ppo.policy import LSTMPPOPolicy

## Configure Training

In [2]:


register_env("IBGym-v1", ibgym_env_creator_rllib)

config = {
    # Environment (RLlib understands openAI gym registered strings).
    "env": "IBGym-v1",
    # Use 2 environment workers (aka "rollout workers") that parallelly
    # collect samples from their own environment clone(s).
    "num_workers": 12,
    "num_gpus": 1,

    # Change this to "framework: torch", if you are using PyTorch.
    # Also, use "framework: tf2" for tf2.x eager execution.
    "framework": "tf",
    "entropy_coeff": 0.0001,
    # "entropy_coeff_schedule":PiecewiseSchedule(endpoints=[(0, 0.01), (143000, 0.00001)]),
    "lr": 3e-5,
    "gamma": 0.994,
    "clip_param": 0.2,
    "seed": 5321,
    "num_sgd_iter": 2,
    "sgd_minibatch_size": 1000,

    # "vf_loss_coeff": 1e-9,
    # "vf_clip_param": 1e7,
    # Tweak the default model provided automatically by RLlib,
    # given the environment's observation- and action spaces.
    "model": {
        # == LSTM ==
        # Whether to wrap the model with an LSTM.
        "use_lstm": False,
        # Max seq len for training the LSTM, defaults to 20.
        #"max_seq_len": 30,
        # Size of the LSTM cell.
        #"lstm_cell_size": 64,
        # "use_attention": True,
        # "attention_num_transformer_units": 2,
        # "attention_dim": 128,
        # "vf_share_layers": True,
        # "fcnet_hiddens": [32, 32, 32],
        # "sgd_minibatch_size": 1024,
        "vf_share_layers": False,
        # Whether to feed a_{t-1} to LSTM (one-hot encoded if discrete).
        "lstm_use_prev_action": False,
        # Whether to feed r_{t-1} to LSTM.
        "lstm_use_prev_reward": False,
        # Whether the LSTM is time-major (TxBx..) or batch-major (BxTx..).
        "_time_major": False,
    },
    "train_batch_size": 32000,
    "timesteps_per_iteration": 32000,
    # "output": "tmp/ib-out",
    # Set up a separate evaluation worker set for the
    # `trainer.evaluate()` call after training (see below).
    "evaluation_num_workers": 3,
    # Only for evaluation runs, render the env.
    "evaluation_config": {
        "render_env": False,
    },
}

In [None]:
results = tune.run(
        PPOTrainer,
        config=config,
        name="industrial_benchmark",
        local_dir="tmp/ray_exp_logs",
        checkpoint_freq=5,
        # stop={"training_iteration": 5},
        sync_config=tune.SyncConfig(
            syncer=None  # Disable syncing
        )
    )

2022-10-15 17:45:23,870	INFO services.py:1470 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
[2m[36m(PPOTrainer pid=18350)[0m 2022-10-15 17:45:29,741	INFO trainer.py:2332 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
[2m[36m(PPOTrainer pid=18350)[0m 2022-10-15 17:45:29,745	INFO ppo.py:414 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=18350)[0m 2022-10-15 17:45:29,745	INFO trainer.py:903 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=18409)[0m   logger.warn(
[2m[36m(RolloutWorker pid=18396)[0m   logger.warn(
[2m[36m(R

Trial name,status,loc
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350


Trial name,status,loc
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350


Trial name,status,loc
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 63984
  counters:
    num_agent_steps_sampled: 63984
    num_agent_steps_trained: 63984
    num_env_steps_sampled: 63984
    num_env_steps_trained: 63984
  custom_metrics: {}
  date: 2022-10-15_17-45-54
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -242643.40926283607
  episode_reward_mean: -352599.23873893346
  episode_reward_min: -694923.1384058335
  episodes_this_iter: 60
  episodes_total: 60
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2951600551605225
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006728667649440467
          model: {}
          policy_loss: -0.012838481925427914
          total_loss: 9.986967086791992
          vf_explained_var: -5.67

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,1,12.6256,63984,-352599,-242643,-694923,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,1,12.6256,63984,-352599,-242643,-694923,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 127968
  counters:
    num_agent_steps_sampled: 127968
    num_agent_steps_trained: 127968
    num_env_steps_sampled: 127968
    num_env_steps_trained: 127968
  custom_metrics: {}
  date: 2022-10-15_17-46-07
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -238017.92135176496
  episode_reward_mean: -337174.52353516035
  episode_reward_min: -694923.1384058335
  episodes_this_iter: 60
  episodes_total: 120
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2939796447753906
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00035324832424521446
          model: {}
          policy_loss: 0.010788892395794392
          total_loss: 10.010529518127441
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,2,25.6294,127968,-337175,-238018,-694923,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,2,25.6294,127968,-337175,-238018,-694923,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 191952
  counters:
    num_agent_steps_sampled: 191952
    num_agent_steps_trained: 191952
    num_env_steps_sampled: 191952
    num_env_steps_trained: 191952
  custom_metrics: {}
  date: 2022-10-15_17-46-20
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -225340.0318600059
  episode_reward_mean: -304559.09038833174
  episode_reward_min: -533422.1447766542
  episodes_this_iter: 60
  episodes_total: 180
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2924649715423584
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003521831822581589
          model: {}
          policy_loss: 0.0038035293109714985
          total_loss: 10.003544807434082
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,3,38.2653,191952,-304559,-225340,-533422,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,3,38.2653,191952,-304559,-225340,-533422,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 255936
  counters:
    num_agent_steps_sampled: 255936
    num_agent_steps_trained: 255936
    num_env_steps_sampled: 255936
    num_env_steps_trained: 255936
  custom_metrics: {}
  date: 2022-10-15_17-46-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -224770.42964126475
  episode_reward_mean: -281661.99980539805
  episode_reward_min: -533422.1447766542
  episodes_this_iter: 72
  episodes_total: 252
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2896173000335693
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00042265572119504213
          model: {}
          policy_loss: 0.006621160544455051
          total_loss: 10.006376266479492
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,4,49.4863,255936,-281662,-224770,-533422,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,4,49.4863,255936,-281662,-224770,-533422,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 319920
  counters:
    num_agent_steps_sampled: 319920
    num_agent_steps_trained: 319920
    num_env_steps_sampled: 319920
    num_env_steps_trained: 319920
  custom_metrics: {}
  date: 2022-10-15_17-46-42
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -223052.06472131092
  episode_reward_mean: -271877.82551125047
  episode_reward_min: -373663.0336891504
  episodes_this_iter: 60
  episodes_total: 312
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2863757610321045
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005363111267797649
          model: {}
          policy_loss: 0.0034235448110848665
          total_loss: 10.003201484680176
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,5,59.7912,319920,-271878,-223052,-373663,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,5,59.7912,319920,-271878,-223052,-373663,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 383904
  counters:
    num_agent_steps_sampled: 383904
    num_agent_steps_trained: 383904
    num_env_steps_sampled: 383904
    num_env_steps_trained: 383904
  custom_metrics: {}
  date: 2022-10-15_17-46-52
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -212165.89986829588
  episode_reward_mean: -262872.05475594173
  episode_reward_min: -361979.8864910163
  episodes_this_iter: 60
  episodes_total: 372
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.282223701477051
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005764019442722201
          model: {}
          policy_loss: -0.013572406955063343
          total_loss: 9.986214637756348
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,6,70.0504,383904,-262872,-212166,-361980,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 447888
  counters:
    num_agent_steps_sampled: 447888
    num_agent_steps_trained: 447888
    num_env_steps_sampled: 447888
    num_env_steps_trained: 447888
  custom_metrics: {}
  date: 2022-10-15_17-47-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -213272.52262748167
  episode_reward_mean: -253590.271526377
  episode_reward_min: -331986.68769621063
  episodes_this_iter: 72
  episodes_total: 444
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.278351068496704
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003412879887036979
          model: {}
          policy_loss: 0.004581911489367485
          total_loss: 10.004321098327637
          vf_explained_var: -

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,7,80.6548,447888,-253590,-213273,-331987,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,7,80.6548,447888,-253590,-213273,-331987,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,7,80.6548,447888,-253590,-213273,-331987,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 511872
  counters:
    num_agent_steps_sampled: 511872
    num_agent_steps_trained: 511872
    num_env_steps_sampled: 511872
    num_env_steps_trained: 511872
  custom_metrics: {}
  date: 2022-10-15_17-47-14
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -219349.98053034037
  episode_reward_mean: -247090.89971458598
  episode_reward_min: -313970.2608194886
  episodes_this_iter: 60
  episodes_total: 504
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.275519371032715
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003656224289443344
          model: {}
          policy_loss: 0.008940943516790867
          total_loss: 10.008686065673828
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,8,91.917,511872,-247091,-219350,-313970,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 575856
  counters:
    num_agent_steps_sampled: 575856
    num_agent_steps_trained: 575856
    num_env_steps_sampled: 575856
    num_env_steps_trained: 575856
  custom_metrics: {}
  date: 2022-10-15_17-47-24
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -215221.96417750447
  episode_reward_mean: -241999.7593672701
  episode_reward_min: -311939.56248644536
  episodes_this_iter: 60
  episodes_total: 564
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2712156772613525
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000536667532287538
          model: {}
          policy_loss: -0.005494036711752415
          total_loss: 9.99428653717041
          vf_explained_var: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,9,101.573,575856,-242000,-215222,-311940,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 639840
  counters:
    num_agent_steps_sampled: 639840
    num_agent_steps_trained: 639840
    num_env_steps_sampled: 639840
    num_env_steps_trained: 639840
  custom_metrics: {}
  date: 2022-10-15_17-47-33
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -218721.96212867805
  episode_reward_mean: -239425.50778217937
  episode_reward_min: -313589.48054541077
  episodes_this_iter: 72
  episodes_total: 636
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.266475200653076
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003333302156534046
          model: {}
          policy_loss: 0.0069070556201040745
          total_loss: 10.006647109985352
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,10,111.177,639840,-239426,-218722,-313589,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 703824
  counters:
    num_agent_steps_sampled: 703824
    num_agent_steps_trained: 703824
    num_env_steps_sampled: 703824
    num_env_steps_trained: 703824
  custom_metrics: {}
  date: 2022-10-15_17-47-43
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -219004.82938186158
  episode_reward_mean: -237834.04827068042
  episode_reward_min: -313589.48054541077
  episodes_this_iter: 60
  episodes_total: 696
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.264876127243042
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004856934829149395
          model: {}
          policy_loss: 0.0038115952629595995
          total_loss: 10.003581047058105
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,11,121.189,703824,-237834,-219005,-313589,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 767808
  counters:
    num_agent_steps_sampled: 767808
    num_agent_steps_trained: 767808
    num_env_steps_sampled: 767808
    num_env_steps_trained: 767808
  custom_metrics: {}
  date: 2022-10-15_17-47-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -215000.75105470297
  episode_reward_mean: -234696.59014041373
  episode_reward_min: -284988.3347207834
  episodes_this_iter: 60
  episodes_total: 756
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2605090141296387
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005598492571152747
          model: {}
          policy_loss: -0.00287074176594615
          total_loss: 9.996916770935059
          vf_explained_var: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,12,130.799,767808,-234697,-215001,-284988,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 831792
  counters:
    num_agent_steps_sampled: 831792
    num_agent_steps_trained: 831792
    num_env_steps_sampled: 831792
    num_env_steps_trained: 831792
  custom_metrics: {}
  date: 2022-10-15_17-48-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -214104.79944185197
  episode_reward_mean: -231428.26363351566
  episode_reward_min: -284159.22981024487
  episodes_this_iter: 72
  episodes_total: 828
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.256251096725464
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006242931704036891
          model: {}
          policy_loss: 0.004615913610905409
          total_loss: 10.004413604736328
          vf_explained_var:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,13,140.348,831792,-231428,-214105,-284159,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 895776
  counters:
    num_agent_steps_sampled: 895776
    num_agent_steps_trained: 895776
    num_env_steps_sampled: 895776
    num_env_steps_trained: 895776
  custom_metrics: {}
  date: 2022-10-15_17-48-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -217431.50591066564
  episode_reward_mean: -229516.38356862048
  episode_reward_min: -275030.25800669636
  episodes_this_iter: 60
  episodes_total: 888
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.253739833831787
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00036737421760335565
          model: {}
          policy_loss: 0.0072363680228590965
          total_loss: 10.006982803344727
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,14,149.931,895776,-229516,-217432,-275030,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 959760
  counters:
    num_agent_steps_sampled: 959760
    num_agent_steps_trained: 959760
    num_env_steps_sampled: 959760
    num_env_steps_trained: 959760
  custom_metrics: {}
  date: 2022-10-15_17-48-22
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -215025.52542607233
  episode_reward_mean: -230555.8826981993
  episode_reward_min: -275455.6014913975
  episodes_this_iter: 60
  episodes_total: 948
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.253085136413574
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005205830675549805
          model: {}
          policy_loss: -0.0036726086400449276
          total_loss: 9.99610710144043
          vf_explained_var: -

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,15,159.53,959760,-230556,-215026,-275456,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1023744
  counters:
    num_agent_steps_sampled: 1023744
    num_agent_steps_trained: 1023744
    num_env_steps_sampled: 1023744
    num_env_steps_trained: 1023744
  custom_metrics: {}
  date: 2022-10-15_17-48-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -215025.52542607233
  episode_reward_mean: -229189.6191036378
  episode_reward_min: -272069.9775234155
  episodes_this_iter: 72
  episodes_total: 1020
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.250271797180176
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00046909565571695566
          model: {}
          policy_loss: 0.006212172098457813
          total_loss: 10.0059814453125
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,16,169.143,1023744,-229190,-215026,-272070,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1087728
  counters:
    num_agent_steps_sampled: 1087728
    num_agent_steps_trained: 1087728
    num_env_steps_sampled: 1087728
    num_env_steps_trained: 1087728
  custom_metrics: {}
  date: 2022-10-15_17-48-41
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -217004.65725689786
  episode_reward_mean: -227495.07538480364
  episode_reward_min: -288548.9399285115
  episodes_this_iter: 60
  episodes_total: 1080
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.246861457824707
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003689402947202325
          model: {}
          policy_loss: 0.006923971232026815
          total_loss: 10.006673812866211
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,17,178.314,1087728,-227495,-217005,-288549,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,17,178.314,1087728,-227495,-217005,-288549,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1151712
  counters:
    num_agent_steps_sampled: 1151712
    num_agent_steps_trained: 1151712
    num_env_steps_sampled: 1151712
    num_env_steps_trained: 1151712
  custom_metrics: {}
  date: 2022-10-15_17-48-51
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -215330.53208459052
  episode_reward_mean: -227654.6524875302
  episode_reward_min: -288548.9399285115
  episodes_this_iter: 60
  episodes_total: 1140
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2423136234283447
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004922968801110983
          model: {}
          policy_loss: -0.005687123164534569
          total_loss: 9.994086265563965
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,18,188.748,1151712,-227655,-215331,-288549,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1215696
  counters:
    num_agent_steps_sampled: 1215696
    num_agent_steps_trained: 1215696
    num_env_steps_sampled: 1215696
    num_env_steps_trained: 1215696
  custom_metrics: {}
  date: 2022-10-15_17-49-00
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -212798.09276124195
  episode_reward_mean: -226976.1396636184
  episode_reward_min: -273072.189932048
  episodes_this_iter: 72
  episodes_total: 1212
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.246368169784546
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003787105088122189
          model: {}
          policy_loss: 0.006922560278326273
          total_loss: 10.006672859191895
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,19,198.078,1215696,-226976,-212798,-273072,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1279680
  counters:
    num_agent_steps_sampled: 1279680
    num_agent_steps_trained: 1279680
    num_env_steps_sampled: 1279680
    num_env_steps_trained: 1279680
  custom_metrics: {}
  date: 2022-10-15_17-49-10
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -210478.6686550575
  episode_reward_mean: -225319.73001156942
  episode_reward_min: -270331.14485579234
  episodes_this_iter: 60
  episodes_total: 1272
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.243297576904297
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004887444665655494
          model: {}
          policy_loss: 0.008289041928946972
          total_loss: 10.008062362670898
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,20,207.985,1279680,-225320,-210479,-270331,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,20,207.985,1279680,-225320,-210479,-270331,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1343664
  counters:
    num_agent_steps_sampled: 1343664
    num_agent_steps_trained: 1343664
    num_env_steps_sampled: 1343664
    num_env_steps_trained: 1343664
  custom_metrics: {}
  date: 2022-10-15_17-49-21
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -210478.6686550575
  episode_reward_mean: -225786.88020844362
  episode_reward_min: -277445.7807820719
  episodes_this_iter: 60
  episodes_total: 1332
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.239436626434326
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000743812182918191
          model: {}
          policy_loss: -0.0024379901587963104
          total_loss: 9.997386932373047
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,21,218.35,1343664,-225787,-210479,-277446,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1407648
  counters:
    num_agent_steps_sampled: 1407648
    num_agent_steps_trained: 1407648
    num_env_steps_sampled: 1407648
    num_env_steps_trained: 1407648
  custom_metrics: {}
  date: 2022-10-15_17-49-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -213593.4353700192
  episode_reward_mean: -225550.09109216742
  episode_reward_min: -263100.88463999605
  episodes_this_iter: 72
  episodes_total: 1404
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.239346742630005
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000701787939760834
          model: {}
          policy_loss: 0.0047050584107637405
          total_loss: 10.004522323608398
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,22,228.182,1407648,-225550,-213593,-263101,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1471632
  counters:
    num_agent_steps_sampled: 1471632
    num_agent_steps_trained: 1471632
    num_env_steps_sampled: 1471632
    num_env_steps_trained: 1471632
  custom_metrics: {}
  date: 2022-10-15_17-49-40
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -210354.47477439442
  episode_reward_mean: -224948.31110643188
  episode_reward_min: -261451.30853591944
  episodes_this_iter: 60
  episodes_total: 1464
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.239452838897705
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000430287909694016
          model: {}
          policy_loss: 0.007540999911725521
          total_loss: 10.007303237915039
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,23,237.807,1471632,-224948,-210354,-261451,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1535616
  counters:
    num_agent_steps_sampled: 1535616
    num_agent_steps_trained: 1535616
    num_env_steps_sampled: 1535616
    num_env_steps_trained: 1535616
  custom_metrics: {}
  date: 2022-10-15_17-49-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -210354.47477439442
  episode_reward_mean: -224519.8953894821
  episode_reward_min: -252272.12381379597
  episodes_this_iter: 60
  episodes_total: 1524
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2424488067626953
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00039378658402711153
          model: {}
          policy_loss: -0.004268104210495949
          total_loss: 9.995485305786133
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,24,247.348,1535616,-224520,-210354,-252272,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1599600
  counters:
    num_agent_steps_sampled: 1599600
    num_agent_steps_trained: 1599600
    num_env_steps_sampled: 1599600
    num_env_steps_trained: 1599600
  custom_metrics: {}
  date: 2022-10-15_17-49-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -209076.2528495645
  episode_reward_mean: -225332.91215697798
  episode_reward_min: -260155.72701965988
  episodes_this_iter: 72
  episodes_total: 1596
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2365453243255615
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004526094126049429
          model: {}
          policy_loss: 0.005908408667892218
          total_loss: 10.005674362182617
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,25,256.916,1599600,-225333,-209076,-260156,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1663584
  counters:
    num_agent_steps_sampled: 1663584
    num_agent_steps_trained: 1663584
    num_env_steps_sampled: 1663584
    num_env_steps_trained: 1663584
  custom_metrics: {}
  date: 2022-10-15_17-50-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -214630.2204302724
  episode_reward_mean: -224623.45794437767
  episode_reward_min: -291711.94014148135
  episodes_this_iter: 60
  episodes_total: 1656
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.231076955795288
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00028687724261544645
          model: {}
          policy_loss: 0.008890391327440739
          total_loss: 10.008624076843262
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,26,266.605,1663584,-224623,-214630,-291712,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1727568
  counters:
    num_agent_steps_sampled: 1727568
    num_agent_steps_trained: 1727568
    num_env_steps_sampled: 1727568
    num_env_steps_trained: 1727568
  custom_metrics: {}
  date: 2022-10-15_17-50-19
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -212375.2727784309
  episode_reward_mean: -223495.4420534401
  episode_reward_min: -291711.94014148135
  episodes_this_iter: 60
  episodes_total: 1716
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.231023073196411
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000344849715474993
          model: {}
          policy_loss: 0.0007876484305597842
          total_loss: 10.000533103942871
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,27,276.637,1727568,-223495,-212375,-291712,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1791552
  counters:
    num_agent_steps_sampled: 1791552
    num_agent_steps_trained: 1791552
    num_env_steps_sampled: 1791552
    num_env_steps_trained: 1791552
  custom_metrics: {}
  date: 2022-10-15_17-50-29
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -211069.5063980216
  episode_reward_mean: -221492.41562703715
  episode_reward_min: -282321.7127316264
  episodes_this_iter: 72
  episodes_total: 1788
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.227975606918335
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004806556098628789
          model: {}
          policy_loss: 0.0027929595671594143
          total_loss: 10.002567291259766
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,28,286.386,1791552,-221492,-211070,-282322,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1855536
  counters:
    num_agent_steps_sampled: 1855536
    num_agent_steps_trained: 1855536
    num_env_steps_sampled: 1855536
    num_env_steps_trained: 1855536
  custom_metrics: {}
  date: 2022-10-15_17-50-39
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -209926.74031046574
  episode_reward_mean: -220537.19347431188
  episode_reward_min: -274396.7564709245
  episodes_this_iter: 60
  episodes_total: 1848
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2262609004974365
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000552977726329118
          model: {}
          policy_loss: 0.0074158646166324615
          total_loss: 10.007203102111816
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,29,296.046,1855536,-220537,-209927,-274397,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1919520
  counters:
    num_agent_steps_sampled: 1919520
    num_agent_steps_trained: 1919520
    num_env_steps_sampled: 1919520
    num_env_steps_trained: 1919520
  custom_metrics: {}
  date: 2022-10-15_17-50-49
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -209926.74031046574
  episode_reward_mean: -221436.88570480817
  episode_reward_min: -286348.52662425936
  episodes_this_iter: 60
  episodes_total: 1908
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2223832607269287
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00028886494692415
          model: {}
          policy_loss: -0.0009171814890578389
          total_loss: 9.998818397521973
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,30,306.034,1919520,-221437,-209927,-286349,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,30,306.034,1919520,-221437,-209927,-286349,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,30,306.034,1919520,-221437,-209927,-286349,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 1983504
  counters:
    num_agent_steps_sampled: 1983504
    num_agent_steps_trained: 1983504
    num_env_steps_sampled: 1983504
    num_env_steps_trained: 1983504
  custom_metrics: {}
  date: 2022-10-15_17-50-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -210458.95154900773
  episode_reward_mean: -220905.32970322523
  episode_reward_min: -286348.52662425936
  episodes_this_iter: 72
  episodes_total: 1980
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2220797538757324
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000270422751782462
          model: {}
          policy_loss: 0.005213574040681124
          total_loss: 10.004944801330566
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,31,316.674,1983504,-220905,-210459,-286349,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2047488
  counters:
    num_agent_steps_sampled: 2047488
    num_agent_steps_trained: 2047488
    num_env_steps_sampled: 2047488
    num_env_steps_trained: 2047488
  custom_metrics: {}
  date: 2022-10-15_17-51-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -209338.3657564658
  episode_reward_mean: -219033.4252700441
  episode_reward_min: -258523.86899292102
  episodes_this_iter: 60
  episodes_total: 2040
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2242774963378906
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00043883765465579927
          model: {}
          policy_loss: 0.006239635404199362
          total_loss: 10.00600528717041
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,32,325.972,2047488,-219033,-209338,-258524,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2111472
  counters:
    num_agent_steps_sampled: 2111472
    num_agent_steps_trained: 2111472
    num_env_steps_sampled: 2111472
    num_env_steps_trained: 2111472
  custom_metrics: {}
  date: 2022-10-15_17-51-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -209338.3657564658
  episode_reward_mean: -219020.10826330565
  episode_reward_min: -243982.4170248548
  episodes_this_iter: 60
  episodes_total: 2100
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.226024866104126
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007154454942792654
          model: {}
          policy_loss: -0.000993553432635963
          total_loss: 9.998827934265137
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,33,335.394,2111472,-219020,-209338,-243982,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2175456
  counters:
    num_agent_steps_sampled: 2175456
    num_agent_steps_trained: 2175456
    num_env_steps_sampled: 2175456
    num_env_steps_trained: 2175456
  custom_metrics: {}
  date: 2022-10-15_17-51-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -209909.14023734463
  episode_reward_mean: -219904.12828523124
  episode_reward_min: -286978.07677529176
  episodes_this_iter: 72
  episodes_total: 2172
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2273266315460205
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004668017791118473
          model: {}
          policy_loss: 0.005257326643913984
          total_loss: 10.005027770996094
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,34,344.818,2175456,-219904,-209909,-286978,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2239440
  counters:
    num_agent_steps_sampled: 2239440
    num_agent_steps_trained: 2239440
    num_env_steps_sampled: 2239440
    num_env_steps_trained: 2239440
  custom_metrics: {}
  date: 2022-10-15_17-51-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -208737.53403211018
  episode_reward_mean: -218871.8188395859
  episode_reward_min: -271486.198224664
  episodes_this_iter: 60
  episodes_total: 2232
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.228803873062134
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003857347182929516
          model: {}
          policy_loss: 0.007035247981548309
          total_loss: 10.006790161132812
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,35,354.418,2239440,-218872,-208738,-271486,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2303424
  counters:
    num_agent_steps_sampled: 2303424
    num_agent_steps_trained: 2303424
    num_env_steps_sampled: 2303424
    num_env_steps_trained: 2303424
  custom_metrics: {}
  date: 2022-10-15_17-51-48
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -208709.29199807203
  episode_reward_mean: -217806.30967994127
  episode_reward_min: -232965.20668357995
  episodes_this_iter: 60
  episodes_total: 2292
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.229562759399414
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004046894027851522
          model: {}
          policy_loss: -0.0005494729848578572
          total_loss: 9.999207496643066
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,36,364.607,2303424,-217806,-208709,-232965,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2367408
  counters:
    num_agent_steps_sampled: 2367408
    num_agent_steps_trained: 2367408
    num_env_steps_sampled: 2367408
    num_env_steps_trained: 2367408
  custom_metrics: {}
  date: 2022-10-15_17-51-57
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -208647.7358469245
  episode_reward_mean: -217310.9309224394
  episode_reward_min: -226955.16309609453
  episodes_this_iter: 72
  episodes_total: 2364
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.225297212600708
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004026492533739656
          model: {}
          policy_loss: 0.00547567056491971
          total_loss: 10.005233764648438
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,37,373.607,2367408,-217311,-208648,-226955,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2431392
  counters:
    num_agent_steps_sampled: 2431392
    num_agent_steps_trained: 2431392
    num_env_steps_sampled: 2431392
    num_env_steps_trained: 2431392
  custom_metrics: {}
  date: 2022-10-15_17-52-06
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -207900.96752774503
  episode_reward_mean: -217367.72445037946
  episode_reward_min: -226764.03085466946
  episodes_this_iter: 60
  episodes_total: 2424
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.228321075439453
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00045503993169404566
          model: {}
          policy_loss: 0.006393748801201582
          total_loss: 10.0061616897583
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,38,382.558,2431392,-217368,-207901,-226764,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2495376
  counters:
    num_agent_steps_sampled: 2495376
    num_agent_steps_trained: 2495376
    num_env_steps_sampled: 2495376
    num_env_steps_trained: 2495376
  custom_metrics: {}
  date: 2022-10-15_17-52-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -207900.96752774503
  episode_reward_mean: -216574.8876996893
  episode_reward_min: -226968.00758351263
  episodes_this_iter: 60
  episodes_total: 2484
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.228062629699707
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006456156843341887
          model: {}
          policy_loss: -0.0009449039353057742
          total_loss: 9.998861312866211
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,39,391.625,2495376,-216575,-207901,-226968,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2559360
  counters:
    num_agent_steps_sampled: 2559360
    num_agent_steps_trained: 2559360
    num_env_steps_sampled: 2559360
    num_env_steps_trained: 2559360
  custom_metrics: {}
  date: 2022-10-15_17-52-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -207980.02095429707
  episode_reward_mean: -216184.4258534462
  episode_reward_min: -249658.92200190452
  episodes_this_iter: 72
  episodes_total: 2556
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.224029541015625
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004600784450303763
          model: {}
          policy_loss: 0.00474914675578475
          total_loss: 10.004517555236816
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,40,400.371,2559360,-216184,-207980,-249659,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2623344
  counters:
    num_agent_steps_sampled: 2623344
    num_agent_steps_trained: 2623344
    num_env_steps_sampled: 2623344
    num_env_steps_trained: 2623344
  custom_metrics: {}
  date: 2022-10-15_17-52-32
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -207816.02147320073
  episode_reward_mean: -216205.03361394544
  episode_reward_min: -249658.92200190452
  episodes_this_iter: 60
  episodes_total: 2616
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2212839126586914
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000315980170853436
          model: {}
          policy_loss: 0.007486524526029825
          total_loss: 10.007227897644043
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,41,409.352,2623344,-216205,-207816,-249659,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2687328
  counters:
    num_agent_steps_sampled: 2687328
    num_agent_steps_trained: 2687328
    num_env_steps_sampled: 2687328
    num_env_steps_trained: 2687328
  custom_metrics: {}
  date: 2022-10-15_17-52-41
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205852.8349482484
  episode_reward_mean: -214828.25769325093
  episode_reward_min: -223682.8558397279
  episodes_this_iter: 60
  episodes_total: 2676
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2156999111175537
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005130969220772386
          model: {}
          policy_loss: -0.0007969269645400345
          total_loss: 9.998983383178711
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,42,417.766,2687328,-214828,-205853,-223683,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,42,417.766,2687328,-214828,-205853,-223683,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2751312
  counters:
    num_agent_steps_sampled: 2751312
    num_agent_steps_trained: 2751312
    num_env_steps_sampled: 2751312
    num_env_steps_trained: 2751312
  custom_metrics: {}
  date: 2022-10-15_17-52-51
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205852.8349482484
  episode_reward_mean: -214388.62221192883
  episode_reward_min: -224903.46175937983
  episodes_this_iter: 72
  episodes_total: 2748
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.215597629547119
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00032368669053539634
          model: {}
          policy_loss: 0.005437218118458986
          total_loss: 10.005179405212402
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,43,428.257,2751312,-214389,-205853,-224903,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2815296
  counters:
    num_agent_steps_sampled: 2815296
    num_agent_steps_trained: 2815296
    num_env_steps_sampled: 2815296
    num_env_steps_trained: 2815296
  custom_metrics: {}
  date: 2022-10-15_17-53-01
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205781.0540200471
  episode_reward_mean: -214398.3340799832
  episode_reward_min: -252520.3002016456
  episodes_this_iter: 60
  episodes_total: 2808
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2142744064331055
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006054143887013197
          model: {}
          policy_loss: 0.00807478278875351
          total_loss: 10.00787353515625
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,44,437.539,2815296,-214398,-205781,-252520,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2879280
  counters:
    num_agent_steps_sampled: 2879280
    num_agent_steps_trained: 2879280
    num_env_steps_sampled: 2879280
    num_env_steps_trained: 2879280
  custom_metrics: {}
  date: 2022-10-15_17-53-10
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205781.0540200471
  episode_reward_mean: -214328.03415298808
  episode_reward_min: -252520.3002016456
  episodes_this_iter: 60
  episodes_total: 2868
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.210197687149048
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000858970801346004
          model: {}
          policy_loss: -0.0009276125929318368
          total_loss: 9.998923301696777
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,45,447.08,2879280,-214328,-205781,-252520,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 2943264
  counters:
    num_agent_steps_sampled: 2943264
    num_agent_steps_trained: 2943264
    num_env_steps_sampled: 2943264
    num_env_steps_trained: 2943264
  custom_metrics: {}
  date: 2022-10-15_17-53-19
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -204940.06613914983
  episode_reward_mean: -213080.25810674022
  episode_reward_min: -225117.04376228715
  episodes_this_iter: 72
  episodes_total: 2940
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2134275436401367
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0002448953746352345
          model: {}
          policy_loss: 0.005803263280540705
          total_loss: 10.005531311035156
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,46,456.069,2943264,-213080,-204940,-225117,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3007248
  counters:
    num_agent_steps_sampled: 3007248
    num_agent_steps_trained: 3007248
    num_env_steps_sampled: 3007248
    num_env_steps_trained: 3007248
  custom_metrics: {}
  date: 2022-10-15_17-53-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205798.15179681694
  episode_reward_mean: -212653.0057965227
  episode_reward_min: -222169.6376325864
  episodes_this_iter: 60
  episodes_total: 3000
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.212869882583618
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003671497688628733
          model: {}
          policy_loss: 0.00651486124843359
          total_loss: 10.006267547607422
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,47,464.981,3007248,-212653,-205798,-222170,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3071232
  counters:
    num_agent_steps_sampled: 3071232
    num_agent_steps_trained: 3071232
    num_env_steps_sampled: 3071232
    num_env_steps_trained: 3071232
  custom_metrics: {}
  date: 2022-10-15_17-53-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205251.8633657829
  episode_reward_mean: -213164.45818073905
  episode_reward_min: -253443.61497098755
  episodes_this_iter: 60
  episodes_total: 3060
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.209899663925171
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003461216692812741
          model: {}
          policy_loss: 0.0015112780965864658
          total_loss: 10.001258850097656
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,48,473.811,3071232,-213164,-205252,-253444,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3135216
  counters:
    num_agent_steps_sampled: 3135216
    num_agent_steps_trained: 3135216
    num_env_steps_sampled: 3135216
    num_env_steps_trained: 3135216
  custom_metrics: {}
  date: 2022-10-15_17-53-47
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205926.5302903076
  episode_reward_mean: -213928.3928186618
  episode_reward_min: -252777.17906708206
  episodes_this_iter: 72
  episodes_total: 3132
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.21120285987854
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003468189388513565
          model: {}
          policy_loss: 0.0036095967516303062
          total_loss: 10.003357887268066
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,49,483.434,3135216,-213928,-205927,-252777,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3199200
  counters:
    num_agent_steps_sampled: 3199200
    num_agent_steps_trained: 3199200
    num_env_steps_sampled: 3199200
    num_env_steps_trained: 3199200
  custom_metrics: {}
  date: 2022-10-15_17-53-57
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205926.5302903076
  episode_reward_mean: -213490.41672878337
  episode_reward_min: -250341.75220083626
  episodes_this_iter: 60
  episodes_total: 3192
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2071053981781006
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00043454914703033864
          model: {}
          policy_loss: 0.0060109589248895645
          total_loss: 10.005776405334473
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,50,493.116,3199200,-213490,-205927,-250342,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3263184
  counters:
    num_agent_steps_sampled: 3263184
    num_agent_steps_trained: 3263184
    num_env_steps_sampled: 3263184
    num_env_steps_trained: 3263184
  custom_metrics: {}
  date: 2022-10-15_17-54-06
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205485.70416347383
  episode_reward_mean: -212157.60214277293
  episode_reward_min: -222489.82992808786
  episodes_this_iter: 60
  episodes_total: 3252
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.206263780593872
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00044187912135384977
          model: {}
          policy_loss: 0.0001812956907087937
          total_loss: 9.999948501586914
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,51,502.358,3263184,-212158,-205486,-222490,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3327168
  counters:
    num_agent_steps_sampled: 3327168
    num_agent_steps_trained: 3327168
    num_env_steps_sampled: 3327168
    num_env_steps_trained: 3327168
  custom_metrics: {}
  date: 2022-10-15_17-54-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205785.2117677363
  episode_reward_mean: -213035.37449688095
  episode_reward_min: -254063.20921247025
  episodes_this_iter: 72
  episodes_total: 3324
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.214769124984741
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006139932083897293
          model: {}
          policy_loss: 0.0050265793688595295
          total_loss: 10.004828453063965
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,52,511.228,3327168,-213035,-205785,-254063,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3391152
  counters:
    num_agent_steps_sampled: 3391152
    num_agent_steps_trained: 3391152
    num_env_steps_sampled: 3391152
    num_env_steps_trained: 3391152
  custom_metrics: {}
  date: 2022-10-15_17-54-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205015.58719139427
  episode_reward_mean: -211825.408540887
  episode_reward_min: -220739.63583597686
  episodes_this_iter: 60
  episodes_total: 3384
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.214702844619751
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000260598782915622
          model: {}
          policy_loss: 0.007322394754737616
          total_loss: 10.007054328918457
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,53,519.757,3391152,-211825,-205016,-220740,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3455136
  counters:
    num_agent_steps_sampled: 3455136
    num_agent_steps_trained: 3455136
    num_env_steps_sampled: 3455136
    num_env_steps_trained: 3455136
  custom_metrics: {}
  date: 2022-10-15_17-54-32
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205015.58719139427
  episode_reward_mean: -210983.42617350107
  episode_reward_min: -218231.95551915755
  episodes_this_iter: 60
  episodes_total: 3444
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.207289218902588
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004114539478905499
          model: {}
          policy_loss: -0.00039842535625211895
          total_loss: 9.999364852905273
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,54,528.72,3455136,-210983,-205016,-218232,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3519120
  counters:
    num_agent_steps_sampled: 3519120
    num_agent_steps_trained: 3519120
    num_env_steps_sampled: 3519120
    num_env_steps_trained: 3519120
  custom_metrics: {}
  date: 2022-10-15_17-54-41
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203118.68321805607
  episode_reward_mean: -211019.47068532533
  episode_reward_min: -222935.03357007974
  episodes_this_iter: 72
  episodes_total: 3516
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1982550621032715
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00041645433520898223
          model: {}
          policy_loss: 0.004186750389635563
          total_loss: 10.003950119018555
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,55,537.773,3519120,-211019,-203119,-222935,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3583104
  counters:
    num_agent_steps_sampled: 3583104
    num_agent_steps_trained: 3583104
    num_env_steps_sampled: 3583104
    num_env_steps_trained: 3583104
  custom_metrics: {}
  date: 2022-10-15_17-54-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -201808.07272266195
  episode_reward_mean: -210785.8362546799
  episode_reward_min: -245124.63321021755
  episodes_this_iter: 60
  episodes_total: 3576
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1948721408843994
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004444870864972472
          model: {}
          policy_loss: 0.007085422985255718
          total_loss: 10.006854057312012
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,56,546.702,3583104,-210786,-201808,-245125,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3647088
  counters:
    num_agent_steps_sampled: 3647088
    num_agent_steps_trained: 3647088
    num_env_steps_sampled: 3647088
    num_env_steps_trained: 3647088
  custom_metrics: {}
  date: 2022-10-15_17-55-00
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203839.79857227998
  episode_reward_mean: -210978.2659158189
  episode_reward_min: -259902.21632882685
  episodes_this_iter: 60
  episodes_total: 3636
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1866345405578613
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003313217603135854
          model: {}
          policy_loss: 0.0006525729550048709
          total_loss: 10.00040054321289
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,57,555.966,3647088,-210978,-203840,-259902,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3711072
  counters:
    num_agent_steps_sampled: 3711072
    num_agent_steps_trained: 3711072
    num_env_steps_sampled: 3711072
    num_env_steps_trained: 3711072
  custom_metrics: {}
  date: 2022-10-15_17-55-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -201461.50059973146
  episode_reward_mean: -210180.51263287605
  episode_reward_min: -233506.8287634325
  episodes_this_iter: 72
  episodes_total: 3708
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.180028200149536
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005239498568698764
          model: {}
          policy_loss: 0.004522498231381178
          total_loss: 10.004308700561523
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,58,564.879,3711072,-210181,-201462,-233507,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3775056
  counters:
    num_agent_steps_sampled: 3775056
    num_agent_steps_trained: 3775056
    num_env_steps_sampled: 3775056
    num_env_steps_trained: 3775056
  custom_metrics: {}
  date: 2022-10-15_17-55-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203312.19797307035
  episode_reward_mean: -209374.6505903025
  episode_reward_min: -215670.8942806155
  episodes_this_iter: 60
  episodes_total: 3768
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1737704277038574
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0002744872763287276
          model: {}
          policy_loss: 0.00665265042334795
          total_loss: 10.006390571594238
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,59,574.047,3775056,-209375,-203312,-215671,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3839040
  counters:
    num_agent_steps_sampled: 3839040
    num_agent_steps_trained: 3839040
    num_env_steps_sampled: 3839040
    num_env_steps_trained: 3839040
  custom_metrics: {}
  date: 2022-10-15_17-55-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -201707.28689280717
  episode_reward_mean: -210576.08077530647
  episode_reward_min: -256120.8199570944
  episodes_this_iter: 60
  episodes_total: 3828
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.174952507019043
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004958385834470391
          model: {}
          policy_loss: 0.00013840523024555296
          total_loss: 9.999919891357422
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,60,583.245,3839040,-210576,-201707,-256121,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3903024
  counters:
    num_agent_steps_sampled: 3903024
    num_agent_steps_trained: 3903024
    num_env_steps_sampled: 3903024
    num_env_steps_trained: 3903024
  custom_metrics: {}
  date: 2022-10-15_17-55-36
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -201707.28689280717
  episode_reward_mean: -209576.0651266085
  episode_reward_min: -256120.8199570944
  episodes_this_iter: 72
  episodes_total: 3900
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1814608573913574
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0010279242414981127
          model: {}
          policy_loss: 0.00456530274823308
          total_loss: 10.0044527053833
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,61,592.269,3903024,-209576,-201707,-256121,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 3967008
  counters:
    num_agent_steps_sampled: 3967008
    num_agent_steps_trained: 3967008
    num_env_steps_sampled: 3967008
    num_env_steps_trained: 3967008
  custom_metrics: {}
  date: 2022-10-15_17-55-45
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203031.78741224817
  episode_reward_mean: -209232.53969680224
  episode_reward_min: -218036.92408632408
  episodes_this_iter: 60
  episodes_total: 3960
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.175597906112671
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000632068607956171
          model: {}
          policy_loss: 0.007625813130289316
          total_loss: 10.007433891296387
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,62,600.778,3967008,-209233,-203032,-218037,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4030992
  counters:
    num_agent_steps_sampled: 4030992
    num_agent_steps_trained: 4030992
    num_env_steps_sampled: 4030992
    num_env_steps_trained: 4030992
  custom_metrics: {}
  date: 2022-10-15_17-55-54
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -202873.63778807785
  episode_reward_mean: -208681.10574136637
  episode_reward_min: -218309.91540856118
  episodes_this_iter: 60
  episodes_total: 4020
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1752097606658936
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000434400251833722
          model: {}
          policy_loss: 0.0009986298391595483
          total_loss: 10.00076675415039
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,63,609.698,4030992,-208681,-202874,-218310,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4094976
  counters:
    num_agent_steps_sampled: 4094976
    num_agent_steps_trained: 4094976
    num_env_steps_sampled: 4094976
    num_env_steps_trained: 4094976
  custom_metrics: {}
  date: 2022-10-15_17-56-04
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -202873.63778807785
  episode_reward_mean: -208479.31249779358
  episode_reward_min: -247614.49995284187
  episodes_this_iter: 72
  episodes_total: 4092
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1698086261749268
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00035724075860343874
          model: {}
          policy_loss: 0.005069557577371597
          total_loss: 10.0048246383667
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,64,618.815,4094976,-208479,-202874,-247614,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4158960
  counters:
    num_agent_steps_sampled: 4158960
    num_agent_steps_trained: 4158960
    num_env_steps_sampled: 4158960
    num_env_steps_trained: 4158960
  custom_metrics: {}
  date: 2022-10-15_17-56-13
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -202020.49268645237
  episode_reward_mean: -208212.63322002982
  episode_reward_min: -247614.49995284187
  episodes_this_iter: 60
  episodes_total: 4152
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.168926239013672
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00032398023176938295
          model: {}
          policy_loss: 0.006847952958196402
          total_loss: 10.006595611572266
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,65,628.052,4158960,-208213,-202020,-247614,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4222944
  counters:
    num_agent_steps_sampled: 4222944
    num_agent_steps_trained: 4222944
    num_env_steps_sampled: 4222944
    num_env_steps_trained: 4222944
  custom_metrics: {}
  date: 2022-10-15_17-56-22
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -202881.9193436428
  episode_reward_mean: -207668.24339773544
  episode_reward_min: -213795.3228298671
  episodes_this_iter: 60
  episodes_total: 4212
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.173933744430542
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005599631113000214
          model: {}
          policy_loss: 0.000814837112557143
          total_loss: 10.000609397888184
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,66,637.036,4222944,-207668,-202882,-213795,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4286928
  counters:
    num_agent_steps_sampled: 4286928
    num_agent_steps_trained: 4286928
    num_env_steps_sampled: 4286928
    num_env_steps_trained: 4286928
  custom_metrics: {}
  date: 2022-10-15_17-56-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -201957.53488388745
  episode_reward_mean: -207954.24058263513
  episode_reward_min: -215639.8551689474
  episodes_this_iter: 72
  episodes_total: 4284
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.174314498901367
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000840349355712533
          model: {}
          policy_loss: 0.0035911151207983494
          total_loss: 10.00344181060791
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,67,646.31,4286928,-207954,-201958,-215640,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4350912
  counters:
    num_agent_steps_sampled: 4350912
    num_agent_steps_trained: 4350912
    num_env_steps_sampled: 4350912
    num_env_steps_trained: 4350912
  custom_metrics: {}
  date: 2022-10-15_17-56-40
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -202386.70511208934
  episode_reward_mean: -208851.34969061072
  episode_reward_min: -261963.46807071698
  episodes_this_iter: 60
  episodes_total: 4344
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.181178092956543
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006076385616324842
          model: {}
          policy_loss: 0.006034711841493845
          total_loss: 10.005836486816406
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,68,655.251,4350912,-208851,-202387,-261963,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4414896
  counters:
    num_agent_steps_sampled: 4414896
    num_agent_steps_trained: 4414896
    num_env_steps_sampled: 4414896
    num_env_steps_trained: 4414896
  custom_metrics: {}
  date: 2022-10-15_17-56-49
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -200032.17883427243
  episode_reward_mean: -207273.98294155343
  episode_reward_min: -216742.88772202234
  episodes_this_iter: 60
  episodes_total: 4404
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.177366018295288
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003693365433719009
          model: {}
          policy_loss: 0.0006331181502901018
          total_loss: 10.000388145446777
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,69,664.31,4414896,-207274,-200032,-216743,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4478880
  counters:
    num_agent_steps_sampled: 4478880
    num_agent_steps_trained: 4478880
    num_env_steps_sampled: 4478880
    num_env_steps_trained: 4478880
  custom_metrics: {}
  date: 2022-10-15_17-56-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -200477.65005922713
  episode_reward_mean: -207195.6106418372
  episode_reward_min: -217519.04003644868
  episodes_this_iter: 72
  episodes_total: 4476
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.167266368865967
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000252281577559188
          model: {}
          policy_loss: 0.003830237779766321
          total_loss: 10.003564834594727
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,70,673.787,4478880,-207196,-200478,-217519,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4542864
  counters:
    num_agent_steps_sampled: 4542864
    num_agent_steps_trained: 4542864
    num_env_steps_sampled: 4542864
    num_env_steps_trained: 4542864
  custom_metrics: {}
  date: 2022-10-15_17-57-08
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -200716.73290927336
  episode_reward_mean: -207211.7237501441
  episode_reward_min: -239550.58016012295
  episodes_this_iter: 60
  episodes_total: 4536
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1660404205322266
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0009987859521061182
          model: {}
          policy_loss: 0.005539108999073505
          total_loss: 10.005422592163086
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,71,683.432,4542864,-207212,-200717,-239551,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4606848
  counters:
    num_agent_steps_sampled: 4606848
    num_agent_steps_trained: 4606848
    num_env_steps_sampled: 4606848
    num_env_steps_trained: 4606848
  custom_metrics: {}
  date: 2022-10-15_17-57-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -200890.34018600624
  episode_reward_mean: -207005.36654379603
  episode_reward_min: -237495.70201472528
  episodes_this_iter: 60
  episodes_total: 4596
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.164699077606201
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000467446050606668
          model: {}
          policy_loss: 0.0008386489353142679
          total_loss: 10.000615119934082
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,72,692.726,4606848,-207005,-200890,-237496,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4670832
  counters:
    num_agent_steps_sampled: 4670832
    num_agent_steps_trained: 4670832
    num_env_steps_sampled: 4670832
    num_env_steps_trained: 4670832
  custom_metrics: {}
  date: 2022-10-15_17-57-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -200638.00236882488
  episode_reward_mean: -207485.6411149632
  episode_reward_min: -258609.28478549703
  episodes_this_iter: 72
  episodes_total: 4668
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.165470838546753
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0009907885687425733
          model: {}
          policy_loss: 0.003714491380378604
          total_loss: 10.003595352172852
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,73,702.924,4670832,-207486,-200638,-258609,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,73,702.924,4670832,-207486,-200638,-258609,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4734816
  counters:
    num_agent_steps_sampled: 4734816
    num_agent_steps_trained: 4734816
    num_env_steps_sampled: 4734816
    num_env_steps_trained: 4734816
  custom_metrics: {}
  date: 2022-10-15_17-57-38
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -200882.7375880243
  episode_reward_mean: -207635.96542270127
  episode_reward_min: -258609.28478549703
  episodes_this_iter: 60
  episodes_total: 4728
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.161393642425537
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00030503064044751227
          model: {}
          policy_loss: 0.0064501045271754265
          total_loss: 10.006194114685059
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,74,712.848,4734816,-207636,-200883,-258609,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4798800
  counters:
    num_agent_steps_sampled: 4798800
    num_agent_steps_trained: 4798800
    num_env_steps_sampled: 4798800
    num_env_steps_trained: 4798800
  custom_metrics: {}
  date: 2022-10-15_17-57-47
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -200682.21756853638
  episode_reward_mean: -206594.06918572477
  episode_reward_min: -235087.1008320407
  episodes_this_iter: 60
  episodes_total: 4788
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.162649154663086
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00043813063530251384
          model: {}
          policy_loss: 0.0012696778867393732
          total_loss: 10.0010404586792
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,75,722.356,4798800,-206594,-200682,-235087,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4862784
  counters:
    num_agent_steps_sampled: 4862784
    num_agent_steps_trained: 4862784
    num_env_steps_sampled: 4862784
    num_env_steps_trained: 4862784
  custom_metrics: {}
  date: 2022-10-15_17-57-57
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199052.90785792537
  episode_reward_mean: -205236.9559650635
  episode_reward_min: -211823.29987861638
  episodes_this_iter: 72
  episodes_total: 4860
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1603899002075195
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000551213335711509
          model: {}
          policy_loss: 0.0035223939921706915
          total_loss: 10.003315925598145
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,76,731.733,4862784,-205237,-199053,-211823,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4926768
  counters:
    num_agent_steps_sampled: 4926768
    num_agent_steps_trained: 4926768
    num_env_steps_sampled: 4926768
    num_env_steps_trained: 4926768
  custom_metrics: {}
  date: 2022-10-15_17-58-06
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199606.1946573924
  episode_reward_mean: -205468.31264879394
  episode_reward_min: -228340.49620741204
  episodes_this_iter: 60
  episodes_total: 4920
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.15058970451355
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0009552846313454211
          model: {}
          policy_loss: 0.006461989600211382
          total_loss: 10.006338119506836
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,77,740.618,4926768,-205468,-199606,-228340,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 4990752
  counters:
    num_agent_steps_sampled: 4990752
    num_agent_steps_trained: 4990752
    num_env_steps_sampled: 4990752
    num_env_steps_trained: 4990752
  custom_metrics: {}
  date: 2022-10-15_17-58-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199606.1946573924
  episode_reward_mean: -206041.46326571508
  episode_reward_min: -228340.49620741204
  episodes_this_iter: 60
  episodes_total: 4980
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1569156646728516
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006169806001707911
          model: {}
          policy_loss: 0.0014065036084502935
          total_loss: 10.001214027404785
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,78,750.083,4990752,-206041,-199606,-228340,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5054736
  counters:
    num_agent_steps_sampled: 5054736
    num_agent_steps_trained: 5054736
    num_env_steps_sampled: 5054736
    num_env_steps_trained: 5054736
  custom_metrics: {}
  date: 2022-10-15_17-58-25
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -200290.11160576128
  episode_reward_mean: -206454.1657879221
  episode_reward_min: -244240.69625734192
  episodes_this_iter: 72
  episodes_total: 5052
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.162109613418579
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003490650560706854
          model: {}
          policy_loss: 0.0037844632752239704
          total_loss: 10.00353717803955
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,79,759.71,5054736,-206454,-200290,-244241,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5118720
  counters:
    num_agent_steps_sampled: 5118720
    num_agent_steps_trained: 5118720
    num_env_steps_sampled: 5118720
    num_env_steps_trained: 5118720
  custom_metrics: {}
  date: 2022-10-15_17-58-34
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199834.0616782613
  episode_reward_mean: -206121.16536554712
  episode_reward_min: -244240.69625734192
  episodes_this_iter: 60
  episodes_total: 5112
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.160750150680542
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006136974552646279
          model: {}
          policy_loss: 0.006638089660555124
          total_loss: 10.006444931030273
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,80,769.08,5118720,-206121,-199834,-244241,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5182704
  counters:
    num_agent_steps_sampled: 5182704
    num_agent_steps_trained: 5182704
    num_env_steps_sampled: 5182704
    num_env_steps_trained: 5182704
  custom_metrics: {}
  date: 2022-10-15_17-58-44
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199304.6081685397
  episode_reward_mean: -205308.78081098758
  episode_reward_min: -211749.53843715962
  episodes_this_iter: 60
  episodes_total: 5172
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.164889097213745
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0009284801199100912
          model: {}
          policy_loss: 0.0021053410600870848
          total_loss: 10.001974105834961
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,81,778.903,5182704,-205309,-199305,-211750,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5246688
  counters:
    num_agent_steps_sampled: 5246688
    num_agent_steps_trained: 5246688
    num_env_steps_sampled: 5246688
    num_env_steps_trained: 5246688
  custom_metrics: {}
  date: 2022-10-15_17-58-54
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -200457.38040644577
  episode_reward_mean: -205212.40878358844
  episode_reward_min: -212197.46950945715
  episodes_this_iter: 72
  episodes_total: 5244
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.170206069946289
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006616329192183912
          model: {}
          policy_loss: 0.0034205836709588766
          total_loss: 10.00323486328125
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,82,788.371,5246688,-205212,-200457,-212197,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5310672
  counters:
    num_agent_steps_sampled: 5310672
    num_agent_steps_trained: 5310672
    num_env_steps_sampled: 5310672
    num_env_steps_trained: 5310672
  custom_metrics: {}
  date: 2022-10-15_17-59-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199630.0755139631
  episode_reward_mean: -205347.1548640604
  episode_reward_min: -216929.79307791026
  episodes_this_iter: 60
  episodes_total: 5304
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1748461723327637
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000989897409453988
          model: {}
          policy_loss: 0.005528916604816914
          total_loss: 10.005410194396973
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,83,797.881,5310672,-205347,-199630,-216930,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5374656
  counters:
    num_agent_steps_sampled: 5374656
    num_agent_steps_trained: 5374656
    num_env_steps_sampled: 5374656
    num_env_steps_trained: 5374656
  custom_metrics: {}
  date: 2022-10-15_17-59-13
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197986.91388865016
  episode_reward_mean: -205408.08145923528
  episode_reward_min: -216929.79307791026
  episodes_this_iter: 60
  episodes_total: 5364
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1730661392211914
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00028312389622442424
          model: {}
          policy_loss: 0.0008859940571710467
          total_loss: 10.000624656677246
          vf_expla

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,84,807.677,5374656,-205408,-197987,-216930,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5438640
  counters:
    num_agent_steps_sampled: 5438640
    num_agent_steps_trained: 5438640
    num_env_steps_sampled: 5438640
    num_env_steps_trained: 5438640
  custom_metrics: {}
  date: 2022-10-15_17-59-22
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197986.91388865016
  episode_reward_mean: -204443.4410009335
  episode_reward_min: -210602.09631121613
  episodes_this_iter: 72
  episodes_total: 5436
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1672050952911377
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005593117675743997
          model: {}
          policy_loss: 0.0035071438178420067
          total_loss: 10.003302574157715
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,85,817.077,5438640,-204443,-197987,-210602,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5502624
  counters:
    num_agent_steps_sampled: 5502624
    num_agent_steps_trained: 5502624
    num_env_steps_sampled: 5502624
    num_env_steps_trained: 5502624
  custom_metrics: {}
  date: 2022-10-15_17-59-32
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199916.41509120396
  episode_reward_mean: -205676.30206225548
  episode_reward_min: -245934.1166822318
  episodes_this_iter: 60
  episodes_total: 5496
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1619317531585693
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005424089031293988
          model: {}
          policy_loss: 0.00674948887899518
          total_loss: 10.006540298461914
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,86,826.423,5502624,-205676,-199916,-245934,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,86,826.423,5502624,-205676,-199916,-245934,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5566608
  counters:
    num_agent_steps_sampled: 5566608
    num_agent_steps_trained: 5566608
    num_env_steps_sampled: 5566608
    num_env_steps_trained: 5566608
  custom_metrics: {}
  date: 2022-10-15_17-59-43
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -200263.35977871495
  episode_reward_mean: -205873.27080879902
  episode_reward_min: -245934.1166822318
  episodes_this_iter: 60
  episodes_total: 5556
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.163386344909668
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005775976460427046
          model: {}
          policy_loss: 0.0017817853949964046
          total_loss: 10.001582145690918
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,87,837.06,5566608,-205873,-200263,-245934,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,87,837.06,5566608,-205873,-200263,-245934,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5630592
  counters:
    num_agent_steps_sampled: 5630592
    num_agent_steps_trained: 5630592
    num_env_steps_sampled: 5630592
    num_env_steps_trained: 5630592
  custom_metrics: {}
  date: 2022-10-15_17-59-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199541.4655563403
  episode_reward_mean: -205031.02766073932
  episode_reward_min: -226212.44300665898
  episodes_this_iter: 72
  episodes_total: 5628
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1645755767822266
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006093898555263877
          model: {}
          policy_loss: 0.003484750632196665
          total_loss: 10.003289222717285
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,88,847.346,5630592,-205031,-199541,-226212,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,88,847.346,5630592,-205031,-199541,-226212,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5694576
  counters:
    num_agent_steps_sampled: 5694576
    num_agent_steps_trained: 5694576
    num_env_steps_sampled: 5694576
    num_env_steps_trained: 5694576
  custom_metrics: {}
  date: 2022-10-15_18-00-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198714.20763095247
  episode_reward_mean: -204706.9049491493
  episode_reward_min: -217826.79932011658
  episodes_this_iter: 60
  episodes_total: 5688
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.164236068725586
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005631299572996795
          model: {}
          policy_loss: 0.006756358314305544
          total_loss: 10.006550788879395
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,89,857.884,5694576,-204707,-198714,-217827,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5758560
  counters:
    num_agent_steps_sampled: 5758560
    num_agent_steps_trained: 5758560
    num_env_steps_sampled: 5758560
    num_env_steps_trained: 5758560
  custom_metrics: {}
  date: 2022-10-15_18-00-13
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198714.20763095247
  episode_reward_mean: -204243.08680619925
  episode_reward_min: -212576.95014575298
  episodes_this_iter: 60
  episodes_total: 5748
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1601064205169678
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004892592551186681
          model: {}
          policy_loss: 0.001644402858801186
          total_loss: 10.001425743103027
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,90,866.997,5758560,-204243,-198714,-212577,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5822544
  counters:
    num_agent_steps_sampled: 5822544
    num_agent_steps_trained: 5822544
    num_env_steps_sampled: 5822544
    num_env_steps_trained: 5822544
  custom_metrics: {}
  date: 2022-10-15_18-00-22
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199725.8778179619
  episode_reward_mean: -203973.1144210914
  episode_reward_min: -212576.95014575298
  episodes_this_iter: 72
  episodes_total: 5820
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1550726890563965
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006179846823215485
          model: {}
          policy_loss: 0.00283757783472538
          total_loss: 10.002644538879395
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,91,876.363,5822544,-203973,-199726,-212577,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5886528
  counters:
    num_agent_steps_sampled: 5886528
    num_agent_steps_trained: 5886528
    num_env_steps_sampled: 5886528
    num_env_steps_trained: 5886528
  custom_metrics: {}
  date: 2022-10-15_18-00-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198360.07036493655
  episode_reward_mean: -204098.82005663638
  episode_reward_min: -209146.27942662948
  episodes_this_iter: 60
  episodes_total: 5880
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1556124687194824
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000390266184695065
          model: {}
          policy_loss: 0.006770114414393902
          total_loss: 10.006532669067383
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,92,885.734,5886528,-204099,-198360,-209146,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 5950512
  counters:
    num_agent_steps_sampled: 5950512
    num_agent_steps_trained: 5950512
    num_env_steps_sampled: 5950512
    num_env_steps_trained: 5950512
  custom_metrics: {}
  date: 2022-10-15_18-00-41
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199825.03659725838
  episode_reward_mean: -204557.07873713208
  episode_reward_min: -218219.03698395018
  episodes_this_iter: 60
  episodes_total: 5940
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.155733823776245
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006416862597689033
          model: {}
          policy_loss: 0.0009501308086328208
          total_loss: 10.000761985778809
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,93,894.91,5950512,-204557,-199825,-218219,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6014496
  counters:
    num_agent_steps_sampled: 6014496
    num_agent_steps_trained: 6014496
    num_env_steps_sampled: 6014496
    num_env_steps_trained: 6014496
  custom_metrics: {}
  date: 2022-10-15_18-00-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198708.7039161356
  episode_reward_mean: -204359.76749232912
  episode_reward_min: -218219.03698395018
  episodes_this_iter: 72
  episodes_total: 6012
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1570684909820557
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000579666462726891
          model: {}
          policy_loss: 0.003319996641948819
          total_loss: 10.003119468688965
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,94,904.373,6014496,-204360,-198709,-218219,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6078480
  counters:
    num_agent_steps_sampled: 6078480
    num_agent_steps_trained: 6078480
    num_env_steps_sampled: 6078480
    num_env_steps_trained: 6078480
  custom_metrics: {}
  date: 2022-10-15_18-01-00
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198708.7039161356
  episode_reward_mean: -204652.8286351112
  episode_reward_min: -211044.23716134846
  episodes_this_iter: 60
  episodes_total: 6072
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1542742252349854
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004743323370348662
          model: {}
          policy_loss: 0.007212648168206215
          total_loss: 10.00699234008789
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,95,914.444,6078480,-204653,-198709,-211044,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,95,914.444,6078480,-204653,-198709,-211044,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6142464
  counters:
    num_agent_steps_sampled: 6142464
    num_agent_steps_trained: 6142464
    num_env_steps_sampled: 6142464
    num_env_steps_trained: 6142464
  custom_metrics: {}
  date: 2022-10-15_18-01-10
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199812.67283125556
  episode_reward_mean: -203996.53817413948
  episode_reward_min: -209014.08072287866
  episodes_this_iter: 60
  episodes_total: 6132
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.157299280166626
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00037009239895269275
          model: {}
          policy_loss: 0.002240918343886733
          total_loss: 10.001999855041504
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,96,924.044,6142464,-203997,-199813,-209014,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6206448
  counters:
    num_agent_steps_sampled: 6206448
    num_agent_steps_trained: 6206448
    num_env_steps_sampled: 6206448
    num_env_steps_trained: 6206448
  custom_metrics: {}
  date: 2022-10-15_18-01-20
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199665.46509165905
  episode_reward_mean: -203798.7671190874
  episode_reward_min: -216912.6671203973
  episodes_this_iter: 72
  episodes_total: 6204
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.153872013092041
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004328920040279627
          model: {}
          policy_loss: 0.002410107757896185
          total_loss: 10.002179145812988
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,97,934.078,6206448,-203799,-199665,-216913,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6270432
  counters:
    num_agent_steps_sampled: 6270432
    num_agent_steps_trained: 6270432
    num_env_steps_sampled: 6270432
    num_env_steps_trained: 6270432
  custom_metrics: {}
  date: 2022-10-15_18-01-30
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199147.32871245008
  episode_reward_mean: -203887.47349808694
  episode_reward_min: -223609.04830784185
  episodes_this_iter: 60
  episodes_total: 6264
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1609907150268555
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005589912761934102
          model: {}
          policy_loss: 0.006300130393356085
          total_loss: 10.006096839904785
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,98,943.793,6270432,-203887,-199147,-223609,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6334416
  counters:
    num_agent_steps_sampled: 6334416
    num_agent_steps_trained: 6334416
    num_env_steps_sampled: 6334416
    num_env_steps_trained: 6334416
  custom_metrics: {}
  date: 2022-10-15_18-01-39
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198737.52283484826
  episode_reward_mean: -203291.27372667738
  episode_reward_min: -223609.04830784185
  episodes_this_iter: 60
  episodes_total: 6324
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1624088287353516
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00048488646280020475
          model: {}
          policy_loss: 0.001171893090941012
          total_loss: 10.00095272064209
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,99,953.386,6334416,-203291,-198738,-223609,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6398400
  counters:
    num_agent_steps_sampled: 6398400
    num_agent_steps_trained: 6398400
    num_env_steps_sampled: 6398400
    num_env_steps_trained: 6398400
  custom_metrics: {}
  date: 2022-10-15_18-01-48
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197776.74024757353
  episode_reward_mean: -203003.5358390092
  episode_reward_min: -209175.1073175916
  episodes_this_iter: 72
  episodes_total: 6396
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1600842475891113
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000675452989526093
          model: {}
          policy_loss: 0.003431726712733507
          total_loss: 10.003251075744629
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,100,962.311,6398400,-203004,-197777,-209175,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6462384
  counters:
    num_agent_steps_sampled: 6462384
    num_agent_steps_trained: 6462384
    num_env_steps_sampled: 6462384
    num_env_steps_trained: 6462384
  custom_metrics: {}
  date: 2022-10-15_18-01-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197776.74024757353
  episode_reward_mean: -203789.241668278
  episode_reward_min: -209175.1073175916
  episodes_this_iter: 60
  episodes_total: 6456
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.160029888153076
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006216650945134461
          model: {}
          policy_loss: 0.005307353567332029
          total_loss: 10.005115509033203
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,101,972.244,6462384,-203789,-197777,-209175,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,101,972.244,6462384,-203789,-197777,-209175,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6526368
  counters:
    num_agent_steps_sampled: 6526368
    num_agent_steps_trained: 6526368
    num_env_steps_sampled: 6526368
    num_env_steps_trained: 6526368
  custom_metrics: {}
  date: 2022-10-15_18-02-08
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198793.0811515486
  episode_reward_mean: -203629.0762632535
  episode_reward_min: -209659.61945730326
  episodes_this_iter: 60
  episodes_total: 6516
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1702358722686768
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00029327356605790555
          model: {}
          policy_loss: 0.002376863034442067
          total_loss: 10.002119064331055
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,102,981.772,6526368,-203629,-198793,-209660,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6590352
  counters:
    num_agent_steps_sampled: 6590352
    num_agent_steps_trained: 6590352
    num_env_steps_sampled: 6590352
    num_env_steps_trained: 6590352
  custom_metrics: {}
  date: 2022-10-15_18-02-17
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197823.33430373808
  episode_reward_mean: -202869.5076738259
  episode_reward_min: -209659.61945730326
  episodes_this_iter: 72
  episodes_total: 6588
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.170722007751465
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005642342148348689
          model: {}
          policy_loss: 0.003279130207374692
          total_loss: 10.003074645996094
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,103,990.694,6590352,-202870,-197823,-209660,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6654336
  counters:
    num_agent_steps_sampled: 6654336
    num_agent_steps_trained: 6654336
    num_env_steps_sampled: 6654336
    num_env_steps_trained: 6654336
  custom_metrics: {}
  date: 2022-10-15_18-02-26
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198024.4444429469
  episode_reward_mean: -203103.12781371613
  episode_reward_min: -236445.83985408902
  episodes_this_iter: 60
  episodes_total: 6648
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1670548915863037
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006706859567202628
          model: {}
          policy_loss: 0.007312648463994265
          total_loss: 10.007129669189453
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,104,999.838,6654336,-203103,-198024,-236446,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6718320
  counters:
    num_agent_steps_sampled: 6718320
    num_agent_steps_trained: 6718320
    num_env_steps_sampled: 6718320
    num_env_steps_trained: 6718320
  custom_metrics: {}
  date: 2022-10-15_18-02-36
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198024.4444429469
  episode_reward_mean: -202668.0148050614
  episode_reward_min: -209188.913197681
  episodes_this_iter: 60
  episodes_total: 6708
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1629066467285156
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005477445665746927
          model: {}
          policy_loss: 0.002418721094727516
          total_loss: 10.00221061706543
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,105,1009.24,6718320,-202668,-198024,-209189,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6782304
  counters:
    num_agent_steps_sampled: 6782304
    num_agent_steps_trained: 6782304
    num_env_steps_sampled: 6782304
    num_env_steps_trained: 6782304
  custom_metrics: {}
  date: 2022-10-15_18-02-45
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198325.721627572
  episode_reward_mean: -203083.6776374626
  episode_reward_min: -229575.18688274364
  episodes_this_iter: 72
  episodes_total: 6780
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.161491870880127
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007220992119982839
          model: {}
          policy_loss: 0.0035069428849965334
          total_loss: 10.003336906433105
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,106,1018.59,6782304,-203084,-198326,-229575,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6846288
  counters:
    num_agent_steps_sampled: 6846288
    num_agent_steps_trained: 6846288
    num_env_steps_sampled: 6846288
    num_env_steps_trained: 6846288
  custom_metrics: {}
  date: 2022-10-15_18-02-55
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198019.8611500153
  episode_reward_mean: -203301.26052771456
  episode_reward_min: -229575.18688274364
  episodes_this_iter: 60
  episodes_total: 6840
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1637654304504395
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00046325832954607904
          model: {}
          policy_loss: 0.006791442167013884
          total_loss: 10.00656795501709
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,107,1028.09,6846288,-203301,-198020,-229575,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6910272
  counters:
    num_agent_steps_sampled: 6910272
    num_agent_steps_trained: 6910272
    num_env_steps_sampled: 6910272
    num_env_steps_trained: 6910272
  custom_metrics: {}
  date: 2022-10-15_18-03-04
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198019.8611500153
  episode_reward_mean: -202723.7370126496
  episode_reward_min: -209161.98678694793
  episodes_this_iter: 60
  episodes_total: 6900
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1621546745300293
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007091466686688364
          model: {}
          policy_loss: 0.0019078647019341588
          total_loss: 10.001734733581543
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,108,1037.35,6910272,-202724,-198020,-209162,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 6974256
  counters:
    num_agent_steps_sampled: 6974256
    num_agent_steps_trained: 6974256
    num_env_steps_sampled: 6974256
    num_env_steps_trained: 6974256
  custom_metrics: {}
  date: 2022-10-15_18-03-13
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198596.28078520074
  episode_reward_mean: -202608.18568475195
  episode_reward_min: -209161.98678694793
  episodes_this_iter: 72
  episodes_total: 6972
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.157299041748047
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006118750898167491
          model: {}
          policy_loss: 0.003028400707989931
          total_loss: 10.00283432006836
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,109,1046.74,6974256,-202608,-198596,-209162,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7038240
  counters:
    num_agent_steps_sampled: 7038240
    num_agent_steps_trained: 7038240
    num_env_steps_sampled: 7038240
    num_env_steps_trained: 7038240
  custom_metrics: {}
  date: 2022-10-15_18-03-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198164.3052731837
  episode_reward_mean: -202485.67775966198
  episode_reward_min: -234782.98529948207
  episodes_this_iter: 60
  episodes_total: 7032
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1559624671936035
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006241085357032716
          model: {}
          policy_loss: 0.006736636161804199
          total_loss: 10.006545066833496
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,110,1055.97,7038240,-202486,-198164,-234783,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7102224
  counters:
    num_agent_steps_sampled: 7102224
    num_agent_steps_trained: 7102224
    num_env_steps_sampled: 7102224
    num_env_steps_trained: 7102224
  custom_metrics: {}
  date: 2022-10-15_18-03-32
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196496.62224666588
  episode_reward_mean: -202031.51674437933
  episode_reward_min: -234782.98529948207
  episodes_this_iter: 60
  episodes_total: 7092
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1525022983551025
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004052871372550726
          model: {}
          policy_loss: 0.002350329887121916
          total_loss: 10.002117156982422
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,111,1065.3,7102224,-202032,-196497,-234783,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7166208
  counters:
    num_agent_steps_sampled: 7166208
    num_agent_steps_trained: 7166208
    num_env_steps_sampled: 7166208
    num_env_steps_trained: 7166208
  custom_metrics: {}
  date: 2022-10-15_18-03-42
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197432.68881068332
  episode_reward_mean: -201907.03794301755
  episode_reward_min: -206463.48816313394
  episodes_this_iter: 72
  episodes_total: 7164
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.151885986328125
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004311231314204633
          model: {}
          policy_loss: 0.002612803829833865
          total_loss: 10.0023832321167
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,112,1074.69,7166208,-201907,-197433,-206463,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7230192
  counters:
    num_agent_steps_sampled: 7230192
    num_agent_steps_trained: 7230192
    num_env_steps_sampled: 7230192
    num_env_steps_trained: 7230192
  custom_metrics: {}
  date: 2022-10-15_18-03-51
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197028.03334603945
  episode_reward_mean: -201865.9339225139
  episode_reward_min: -210262.1120102955
  episodes_this_iter: 60
  episodes_total: 7224
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.156367778778076
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004146574647165835
          model: {}
          policy_loss: 0.006918126717209816
          total_loss: 10.006685256958008
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,113,1083.62,7230192,-201866,-197028,-210262,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7294176
  counters:
    num_agent_steps_sampled: 7294176
    num_agent_steps_trained: 7294176
    num_env_steps_sampled: 7294176
    num_env_steps_trained: 7294176
  custom_metrics: {}
  date: 2022-10-15_18-04-00
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197028.03334603945
  episode_reward_mean: -201453.0274302896
  episode_reward_min: -210262.1120102955
  episodes_this_iter: 60
  episodes_total: 7284
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.156071662902832
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003253307077102363
          model: {}
          policy_loss: 0.0032285405322909355
          total_loss: 10.002978324890137
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,114,1093,7294176,-201453,-197028,-210262,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7358160
  counters:
    num_agent_steps_sampled: 7358160
    num_agent_steps_trained: 7358160
    num_env_steps_sampled: 7358160
    num_env_steps_trained: 7358160
  custom_metrics: {}
  date: 2022-10-15_18-04-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196820.46437565066
  episode_reward_mean: -201521.81090403371
  episode_reward_min: -219701.79957358804
  episodes_this_iter: 72
  episodes_total: 7356
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1476569175720215
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00026212798547931015
          model: {}
          policy_loss: 0.003176466329023242
          total_loss: 10.002915382385254
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,115,1102.4,7358160,-201522,-196820,-219702,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7422144
  counters:
    num_agent_steps_sampled: 7422144
    num_agent_steps_trained: 7422144
    num_env_steps_sampled: 7422144
    num_env_steps_trained: 7422144
  custom_metrics: {}
  date: 2022-10-15_18-04-19
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196820.46437565066
  episode_reward_mean: -201444.1571800085
  episode_reward_min: -210362.1332774926
  episodes_this_iter: 60
  episodes_total: 7416
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.153381824493408
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0015040693106129766
          model: {}
          policy_loss: 0.00590597465634346
          total_loss: 10.005892753601074
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,116,1111.73,7422144,-201444,-196820,-210362,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7486128
  counters:
    num_agent_steps_sampled: 7486128
    num_agent_steps_trained: 7486128
    num_env_steps_sampled: 7486128
    num_env_steps_trained: 7486128
  custom_metrics: {}
  date: 2022-10-15_18-04-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197739.18136940358
  episode_reward_mean: -201156.35366746993
  episode_reward_min: -206851.62666571108
  episodes_this_iter: 60
  episodes_total: 7476
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1540627479553223
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005352548905648291
          model: {}
          policy_loss: 0.0035927840508520603
          total_loss: 10.00338363647461
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,117,1121.36,7486128,-201156,-197739,-206852,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7550112
  counters:
    num_agent_steps_sampled: 7550112
    num_agent_steps_trained: 7550112
    num_env_steps_sampled: 7550112
    num_env_steps_trained: 7550112
  custom_metrics: {}
  date: 2022-10-15_18-04-38
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196550.47090199013
  episode_reward_mean: -200855.68711281093
  episode_reward_min: -208364.1896946651
  episodes_this_iter: 72
  episodes_total: 7548
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.149707555770874
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0010426006047055125
          model: {}
          policy_loss: 0.002495621331036091
          total_loss: 10.002388954162598
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,118,1131,7550112,-200856,-196550,-208364,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7614096
  counters:
    num_agent_steps_sampled: 7614096
    num_agent_steps_trained: 7614096
    num_env_steps_sampled: 7614096
    num_env_steps_trained: 7614096
  custom_metrics: {}
  date: 2022-10-15_18-04-49
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196546.31287368652
  episode_reward_mean: -200816.7087770864
  episode_reward_min: -209274.33448804775
  episodes_this_iter: 60
  episodes_total: 7608
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1564886569976807
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00036395140341483057
          model: {}
          policy_loss: 0.007080391515046358
          total_loss: 10.0068359375
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,119,1141.46,7614096,-200817,-196546,-209274,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,119,1141.46,7614096,-200817,-196546,-209274,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7678080
  counters:
    num_agent_steps_sampled: 7678080
    num_agent_steps_trained: 7678080
    num_env_steps_sampled: 7678080
    num_env_steps_trained: 7678080
  custom_metrics: {}
  date: 2022-10-15_18-04-58
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196546.31287368652
  episode_reward_mean: -201048.437487677
  episode_reward_min: -207961.76101730025
  episodes_this_iter: 60
  episodes_total: 7668
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1524384021759033
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00026843350497074425
          model: {}
          policy_loss: 0.00331737007945776
          total_loss: 10.003055572509766
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,120,1150.64,7678080,-201048,-196546,-207962,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7742064
  counters:
    num_agent_steps_sampled: 7742064
    num_agent_steps_trained: 7742064
    num_env_steps_sampled: 7742064
    num_env_steps_trained: 7742064
  custom_metrics: {}
  date: 2022-10-15_18-05-07
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197545.18461481755
  episode_reward_mean: -200991.64017604568
  episode_reward_min: -207541.57125183215
  episodes_this_iter: 72
  episodes_total: 7740
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1550588607788086
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004898428451269865
          model: {}
          policy_loss: 0.003156780730932951
          total_loss: 10.002939224243164
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,121,1159.83,7742064,-200992,-197545,-207542,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7806048
  counters:
    num_agent_steps_sampled: 7806048
    num_agent_steps_trained: 7806048
    num_env_steps_sampled: 7806048
    num_env_steps_trained: 7806048
  custom_metrics: {}
  date: 2022-10-15_18-05-17
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197068.52377735902
  episode_reward_mean: -201081.3571646468
  episode_reward_min: -207541.57125183215
  episodes_this_iter: 60
  episodes_total: 7800
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.154980421066284
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0009144122595898807
          model: {}
          policy_loss: 0.00620920117944479
          total_loss: 10.00607681274414
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,122,1170.08,7806048,-201081,-197069,-207542,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,122,1170.08,7806048,-201081,-197069,-207542,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,122,1170.08,7806048,-201081,-197069,-207542,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7870032
  counters:
    num_agent_steps_sampled: 7870032
    num_agent_steps_trained: 7870032
    num_env_steps_sampled: 7870032
    num_env_steps_trained: 7870032
  custom_metrics: {}
  date: 2022-10-15_18-05-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197891.75963444795
  episode_reward_mean: -201717.62737495202
  episode_reward_min: -207894.76134957568
  episodes_this_iter: 60
  episodes_total: 7860
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1590518951416016
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00046730050235055387
          model: {}
          policy_loss: 0.0024245711974799633
          total_loss: 10.002202987670898
          vf_expla

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,123,1180.24,7870032,-201718,-197892,-207895,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7934016
  counters:
    num_agent_steps_sampled: 7934016
    num_agent_steps_trained: 7934016
    num_env_steps_sampled: 7934016
    num_env_steps_trained: 7934016
  custom_metrics: {}
  date: 2022-10-15_18-05-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196770.71465957697
  episode_reward_mean: -201837.39054080713
  episode_reward_min: -207894.76134957568
  episodes_this_iter: 72
  episodes_total: 7932
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1577701568603516
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004839367466047406
          model: {}
          policy_loss: 0.0028922853525727987
          total_loss: 10.00267219543457
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,124,1189.8,7934016,-201837,-196771,-207895,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 7998000
  counters:
    num_agent_steps_sampled: 7998000
    num_agent_steps_trained: 7998000
    num_env_steps_sampled: 7998000
    num_env_steps_trained: 7998000
  custom_metrics: {}
  date: 2022-10-15_18-05-47
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197518.11951609034
  episode_reward_mean: -201793.3957909703
  episode_reward_min: -205641.01641294654
  episodes_this_iter: 60
  episodes_total: 7992
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1551811695098877
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004603054258041084
          model: {}
          policy_loss: 0.006309158634394407
          total_loss: 10.006086349487305
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,125,1199.29,7998000,-201793,-197518,-205641,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8061984
  counters:
    num_agent_steps_sampled: 8061984
    num_agent_steps_trained: 8061984
    num_env_steps_sampled: 8061984
    num_env_steps_trained: 8061984
  custom_metrics: {}
  date: 2022-10-15_18-05-55
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197518.11951609034
  episode_reward_mean: -201881.72241288738
  episode_reward_min: -209381.8054393454
  episodes_this_iter: 60
  episodes_total: 8052
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1540634632110596
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00033239819458685815
          model: {}
          policy_loss: 0.0030983430333435535
          total_loss: 10.002850532531738
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,126,1208.13,8061984,-201882,-197518,-209382,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8125968
  counters:
    num_agent_steps_sampled: 8125968
    num_agent_steps_trained: 8125968
    num_env_steps_sampled: 8125968
    num_env_steps_trained: 8125968
  custom_metrics: {}
  date: 2022-10-15_18-06-05
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196127.48871397975
  episode_reward_mean: -201536.36601125833
  episode_reward_min: -206925.53649977734
  episodes_this_iter: 72
  episodes_total: 8124
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1569316387176514
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006503960466943681
          model: {}
          policy_loss: 0.002944611944258213
          total_loss: 10.002758026123047
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,127,1217.36,8125968,-201536,-196127,-206926,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8189952
  counters:
    num_agent_steps_sampled: 8189952
    num_agent_steps_trained: 8189952
    num_env_steps_sampled: 8189952
    num_env_steps_trained: 8189952
  custom_metrics: {}
  date: 2022-10-15_18-06-14
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197607.6536389721
  episode_reward_mean: -201100.70945111747
  episode_reward_min: -211511.51083855476
  episodes_this_iter: 60
  episodes_total: 8184
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.155449151992798
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005070922779850662
          model: {}
          policy_loss: 0.006142523605376482
          total_loss: 10.005928039550781
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,128,1226.64,8189952,-201101,-197608,-211512,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8253936
  counters:
    num_agent_steps_sampled: 8253936
    num_agent_steps_trained: 8253936
    num_env_steps_sampled: 8253936
    num_env_steps_trained: 8253936
  custom_metrics: {}
  date: 2022-10-15_18-06-24
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196137.00935382568
  episode_reward_mean: -201100.4420721797
  episode_reward_min: -209644.1315852862
  episodes_this_iter: 60
  episodes_total: 8244
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1574246883392334
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003572635177988559
          model: {}
          policy_loss: 0.003220645245164633
          total_loss: 10.002976417541504
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,129,1236.08,8253936,-201100,-196137,-209644,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8317920
  counters:
    num_agent_steps_sampled: 8317920
    num_agent_steps_trained: 8317920
    num_env_steps_sampled: 8317920
    num_env_steps_trained: 8317920
  custom_metrics: {}
  date: 2022-10-15_18-06-33
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196137.00935382568
  episode_reward_mean: -201991.68347359347
  episode_reward_min: -251793.0248700226
  episodes_this_iter: 72
  episodes_total: 8316
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1550159454345703
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007188533782027662
          model: {}
          policy_loss: 0.002761990064755082
          total_loss: 10.002589225769043
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,130,1245.36,8317920,-201992,-196137,-251793,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8381904
  counters:
    num_agent_steps_sampled: 8381904
    num_agent_steps_trained: 8381904
    num_env_steps_sampled: 8381904
    num_env_steps_trained: 8381904
  custom_metrics: {}
  date: 2022-10-15_18-06-42
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195213.0257981498
  episode_reward_mean: -200979.6229463314
  episode_reward_min: -251793.0248700226
  episodes_this_iter: 60
  episodes_total: 8376
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1514556407928467
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007135669584386051
          model: {}
          policy_loss: 0.005752533674240112
          total_loss: 10.00558090209961
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,131,1254.84,8381904,-200980,-195213,-251793,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8445888
  counters:
    num_agent_steps_sampled: 8445888
    num_agent_steps_trained: 8445888
    num_env_steps_sampled: 8445888
    num_env_steps_trained: 8445888
  custom_metrics: {}
  date: 2022-10-15_18-06-52
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195213.0257981498
  episode_reward_mean: -200745.9646095395
  episode_reward_min: -205696.9862178534
  episodes_this_iter: 60
  episodes_total: 8436
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1524641513824463
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00042273130384273827
          model: {}
          policy_loss: 0.003891976084560156
          total_loss: 10.003662109375
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,132,1264.18,8445888,-200746,-195213,-205697,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8509872
  counters:
    num_agent_steps_sampled: 8509872
    num_agent_steps_trained: 8509872
    num_env_steps_sampled: 8509872
    num_env_steps_trained: 8509872
  custom_metrics: {}
  date: 2022-10-15_18-07-01
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197209.6642898574
  episode_reward_mean: -200672.2312810637
  episode_reward_min: -217947.42799727025
  episodes_this_iter: 72
  episodes_total: 8508
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1527223587036133
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00038828811375424266
          model: {}
          policy_loss: 0.0029910423327237368
          total_loss: 10.002753257751465
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,133,1273.71,8509872,-200672,-197210,-217947,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8573856
  counters:
    num_agent_steps_sampled: 8573856
    num_agent_steps_trained: 8573856
    num_env_steps_sampled: 8573856
    num_env_steps_trained: 8573856
  custom_metrics: {}
  date: 2022-10-15_18-07-11
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196616.44221502315
  episode_reward_mean: -200597.50351694372
  episode_reward_min: -217947.42799727025
  episodes_this_iter: 60
  episodes_total: 8568
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.152390480041504
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004828451492357999
          model: {}
          policy_loss: 0.006824715994298458
          total_loss: 10.006606101989746
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,134,1283.09,8573856,-200598,-196616,-217947,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8637840
  counters:
    num_agent_steps_sampled: 8637840
    num_agent_steps_trained: 8637840
    num_env_steps_sampled: 8637840
    num_env_steps_trained: 8637840
  custom_metrics: {}
  date: 2022-10-15_18-07-20
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196616.44221502315
  episode_reward_mean: -200832.56751469194
  episode_reward_min: -221895.3981939953
  episodes_this_iter: 60
  episodes_total: 8628
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1501972675323486
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007326400955207646
          model: {}
          policy_loss: 0.0037965287920087576
          total_loss: 10.00362777709961
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,135,1292.45,8637840,-200833,-196616,-221895,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,135,1292.45,8637840,-200833,-196616,-221895,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8701824
  counters:
    num_agent_steps_sampled: 8701824
    num_agent_steps_trained: 8701824
    num_env_steps_sampled: 8701824
    num_env_steps_trained: 8701824
  custom_metrics: {}
  date: 2022-10-15_18-07-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195927.38547142365
  episode_reward_mean: -200511.85513266985
  episode_reward_min: -206413.76831491597
  episodes_this_iter: 72
  episodes_total: 8700
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.147284507751465
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004614510980900377
          model: {}
          policy_loss: 0.002076235366985202
          total_loss: 10.001852035522461
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,136,1302.8,8701824,-200512,-195927,-206414,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8765808
  counters:
    num_agent_steps_sampled: 8765808
    num_agent_steps_trained: 8765808
    num_env_steps_sampled: 8765808
    num_env_steps_trained: 8765808
  custom_metrics: {}
  date: 2022-10-15_18-07-40
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195891.13432284424
  episode_reward_mean: -200844.1473928474
  episode_reward_min: -252776.90730740686
  episodes_this_iter: 60
  episodes_total: 8760
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.14870548248291
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00043670920422300696
          model: {}
          policy_loss: 0.006132058799266815
          total_loss: 10.005905151367188
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,137,1311.98,8765808,-200844,-195891,-252777,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8829792
  counters:
    num_agent_steps_sampled: 8829792
    num_agent_steps_trained: 8829792
    num_env_steps_sampled: 8829792
    num_env_steps_trained: 8829792
  custom_metrics: {}
  date: 2022-10-15_18-07-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195891.13432284424
  episode_reward_mean: -201134.1605345182
  episode_reward_min: -252776.90730740686
  episodes_this_iter: 60
  episodes_total: 8820
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1414031982421875
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006363184656947851
          model: {}
          policy_loss: 0.003580055432394147
          total_loss: 10.003393173217773
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,138,1322.22,8829792,-201134,-195891,-252777,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,138,1322.22,8829792,-201134,-195891,-252777,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8893776
  counters:
    num_agent_steps_sampled: 8893776
    num_agent_steps_trained: 8893776
    num_env_steps_sampled: 8893776
    num_env_steps_trained: 8893776
  custom_metrics: {}
  date: 2022-10-15_18-08-00
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196277.7257777654
  episode_reward_mean: -200592.51555640966
  episode_reward_min: -219225.0622931547
  episodes_this_iter: 72
  episodes_total: 8892
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1418352127075195
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005130615318194032
          model: {}
          policy_loss: 0.0025880271568894386
          total_loss: 10.002375602722168
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,139,1331.92,8893776,-200593,-196278,-219225,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 8957760
  counters:
    num_agent_steps_sampled: 8957760
    num_agent_steps_trained: 8957760
    num_env_steps_sampled: 8957760
    num_env_steps_trained: 8957760
  custom_metrics: {}
  date: 2022-10-15_18-08-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196558.92419581383
  episode_reward_mean: -201331.9025039508
  episode_reward_min: -249342.57784109356
  episodes_this_iter: 60
  episodes_total: 8952
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.138327121734619
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0009994362480938435
          model: {}
          policy_loss: 0.0070093730464577675
          total_loss: 10.006895065307617
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,140,1340.8,8957760,-201332,-196559,-249343,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9021744
  counters:
    num_agent_steps_sampled: 9021744
    num_agent_steps_trained: 9021744
    num_env_steps_sampled: 9021744
    num_env_steps_trained: 9021744
  custom_metrics: {}
  date: 2022-10-15_18-08-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196558.92419581383
  episode_reward_mean: -201068.03406029544
  episode_reward_min: -253549.05869245704
  episodes_this_iter: 60
  episodes_total: 9012
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1382198333740234
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0015086818020790815
          model: {}
          policy_loss: 0.003853869391605258
          total_loss: 10.0038423538208
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,141,1350.17,9021744,-201068,-196559,-253549,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9085728
  counters:
    num_agent_steps_sampled: 9085728
    num_agent_steps_trained: 9085728
    num_env_steps_sampled: 9085728
    num_env_steps_trained: 9085728
  custom_metrics: {}
  date: 2022-10-15_18-08-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195825.43871789193
  episode_reward_mean: -201197.22080513652
  episode_reward_min: -253549.05869245704
  episodes_this_iter: 72
  episodes_total: 9084
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.138425350189209
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006485298508778214
          model: {}
          policy_loss: 0.0018385299481451511
          total_loss: 10.001653671264648
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,142,1359.38,9085728,-201197,-195825,-253549,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9149712
  counters:
    num_agent_steps_sampled: 9149712
    num_agent_steps_trained: 9149712
    num_env_steps_sampled: 9149712
    num_env_steps_trained: 9149712
  custom_metrics: {}
  date: 2022-10-15_18-08-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195778.00558932457
  episode_reward_mean: -200044.8223778415
  episode_reward_min: -233066.49587865098
  episodes_this_iter: 60
  episodes_total: 9144
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1347544193267822
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0014879789669066668
          model: {}
          policy_loss: 0.006259610410779715
          total_loss: 10.006243705749512
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,143,1368.88,9149712,-200045,-195778,-233066,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9213696
  counters:
    num_agent_steps_sampled: 9213696
    num_agent_steps_trained: 9213696
    num_env_steps_sampled: 9213696
    num_env_steps_trained: 9213696
  custom_metrics: {}
  date: 2022-10-15_18-08-46
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195092.4990336426
  episode_reward_mean: -200136.79119460727
  episode_reward_min: -223726.89740539822
  episodes_this_iter: 60
  episodes_total: 9204
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1345717906951904
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005188697250559926
          model: {}
          policy_loss: 0.004527584183961153
          total_loss: 10.004317283630371
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,144,1378.12,9213696,-200137,-195092,-223727,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9277680
  counters:
    num_agent_steps_sampled: 9277680
    num_agent_steps_trained: 9277680
    num_env_steps_sampled: 9277680
    num_env_steps_trained: 9277680
  custom_metrics: {}
  date: 2022-10-15_18-08-56
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195031.6818995292
  episode_reward_mean: -200304.0961370686
  episode_reward_min: -223726.89740539822
  episodes_this_iter: 72
  episodes_total: 9276
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1340854167938232
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00030976824928075075
          model: {}
          policy_loss: 0.0025494080036878586
          total_loss: 10.002298355102539
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,145,1387.47,9277680,-200304,-195032,-223727,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9341664
  counters:
    num_agent_steps_sampled: 9341664
    num_agent_steps_trained: 9341664
    num_env_steps_sampled: 9341664
    num_env_steps_trained: 9341664
  custom_metrics: {}
  date: 2022-10-15_18-09-05
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195031.6818995292
  episode_reward_mean: -200014.8175162181
  episode_reward_min: -210768.076152837
  episodes_this_iter: 60
  episodes_total: 9336
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1281704902648926
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003610054263845086
          model: {}
          policy_loss: 0.006445889826864004
          total_loss: 10.006205558776855
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,146,1397.14,9341664,-200015,-195032,-210768,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9405648
  counters:
    num_agent_steps_sampled: 9405648
    num_agent_steps_trained: 9405648
    num_env_steps_sampled: 9405648
    num_env_steps_trained: 9405648
  custom_metrics: {}
  date: 2022-10-15_18-09-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195304.71759447205
  episode_reward_mean: -200034.0637120857
  episode_reward_min: -210768.076152837
  episodes_this_iter: 60
  episodes_total: 9396
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.126720428466797
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00029550702311098576
          model: {}
          policy_loss: 0.004208127968013287
          total_loss: 10.003955841064453
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,147,1406.48,9405648,-200034,-195305,-210768,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9469632
  counters:
    num_agent_steps_sampled: 9469632
    num_agent_steps_trained: 9469632
    num_env_steps_sampled: 9469632
    num_env_steps_trained: 9469632
  custom_metrics: {}
  date: 2022-10-15_18-09-24
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195904.57990275125
  episode_reward_mean: -200433.98657499204
  episode_reward_min: -215439.3446295885
  episodes_this_iter: 72
  episodes_total: 9468
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1226818561553955
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00038026730180718005
          model: {}
          policy_loss: 0.0011159214191138744
          total_loss: 10.000880241394043
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,148,1415.93,9469632,-200434,-195905,-215439,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9533616
  counters:
    num_agent_steps_sampled: 9533616
    num_agent_steps_trained: 9533616
    num_env_steps_sampled: 9533616
    num_env_steps_trained: 9533616
  custom_metrics: {}
  date: 2022-10-15_18-09-34
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196183.11463375622
  episode_reward_mean: -200076.41838633074
  episode_reward_min: -211781.96371532168
  episodes_this_iter: 60
  episodes_total: 9528
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1254324913024902
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008939740946516395
          model: {}
          policy_loss: 0.0062369294464588165
          total_loss: 10.006103515625
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,149,1425.54,9533616,-200076,-196183,-211782,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9597600
  counters:
    num_agent_steps_sampled: 9597600
    num_agent_steps_trained: 9597600
    num_env_steps_sampled: 9597600
    num_env_steps_trained: 9597600
  custom_metrics: {}
  date: 2022-10-15_18-09-43
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196124.0692157312
  episode_reward_mean: -199594.9716953974
  episode_reward_min: -206332.98449027186
  episodes_this_iter: 60
  episodes_total: 9588
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.113942861557007
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0011205518385395408
          model: {}
          policy_loss: 0.003482159459963441
          total_loss: 10.003395080566406
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,150,1435,9597600,-199595,-196124,-206333,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9661584
  counters:
    num_agent_steps_sampled: 9661584
    num_agent_steps_trained: 9661584
    num_env_steps_sampled: 9661584
    num_env_steps_trained: 9661584
  custom_metrics: {}
  date: 2022-10-15_18-09-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194545.11333435818
  episode_reward_mean: -199465.18590590102
  episode_reward_min: -204855.56312661574
  episodes_this_iter: 72
  episodes_total: 9660
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.115866184234619
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006824176525697112
          model: {}
          policy_loss: 0.0018298262730240822
          total_loss: 10.001654624938965
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,151,1444.44,9661584,-199465,-194545,-204856,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9725568
  counters:
    num_agent_steps_sampled: 9725568
    num_agent_steps_trained: 9725568
    num_env_steps_sampled: 9725568
    num_env_steps_trained: 9725568
  custom_metrics: {}
  date: 2022-10-15_18-10-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194642.47309875194
  episode_reward_mean: -199542.0358511512
  episode_reward_min: -204855.56312661574
  episodes_this_iter: 60
  episodes_total: 9720
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.128398895263672
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007186588481999934
          model: {}
          policy_loss: 0.006383867468684912
          total_loss: 10.00621509552002
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,152,1453.66,9725568,-199542,-194642,-204856,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9789552
  counters:
    num_agent_steps_sampled: 9789552
    num_agent_steps_trained: 9789552
    num_env_steps_sampled: 9789552
    num_env_steps_trained: 9789552
  custom_metrics: {}
  date: 2022-10-15_18-10-11
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194642.47309875194
  episode_reward_mean: -199310.37365120318
  episode_reward_min: -209482.83940226174
  episodes_this_iter: 60
  episodes_total: 9780
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.128038167953491
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005341186770237982
          model: {}
          policy_loss: 0.003935638815164566
          total_loss: 10.003730773925781
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,153,1463.02,9789552,-199310,-194642,-209483,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9853536
  counters:
    num_agent_steps_sampled: 9853536
    num_agent_steps_trained: 9853536
    num_env_steps_sampled: 9853536
    num_env_steps_trained: 9853536
  custom_metrics: {}
  date: 2022-10-15_18-10-21
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194233.58558442918
  episode_reward_mean: -200056.7403624933
  episode_reward_min: -209999.51884579286
  episodes_this_iter: 72
  episodes_total: 9852
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.127915382385254
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007267030887305737
          model: {}
          policy_loss: 0.002065693959593773
          total_loss: 10.001895904541016
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,154,1472.37,9853536,-200057,-194234,-210000,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9917520
  counters:
    num_agent_steps_sampled: 9917520
    num_agent_steps_trained: 9917520
    num_env_steps_sampled: 9917520
    num_env_steps_trained: 9917520
  custom_metrics: {}
  date: 2022-10-15_18-10-30
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195438.5450448174
  episode_reward_mean: -200010.12247205857
  episode_reward_min: -209999.51884579286
  episodes_this_iter: 60
  episodes_total: 9912
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.121026039123535
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007987250573933125
          model: {}
          policy_loss: 0.00635549845173955
          total_loss: 10.006202697753906
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,155,1481.27,9917520,-200010,-195439,-210000,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 9981504
  counters:
    num_agent_steps_sampled: 9981504
    num_agent_steps_trained: 9981504
    num_env_steps_sampled: 9981504
    num_env_steps_trained: 9981504
  custom_metrics: {}
  date: 2022-10-15_18-10-39
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195356.6640955735
  episode_reward_mean: -199587.9754430106
  episode_reward_min: -206523.86991858747
  episodes_this_iter: 60
  episodes_total: 9972
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1175124645233154
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005672713159583509
          model: {}
          policy_loss: 0.004035801161080599
          total_loss: 10.003835678100586
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,156,1490.02,9981504,-199588,-195357,-206524,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10045488
  counters:
    num_agent_steps_sampled: 10045488
    num_agent_steps_trained: 10045488
    num_env_steps_sampled: 10045488
    num_env_steps_trained: 10045488
  custom_metrics: {}
  date: 2022-10-15_18-10-49
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195738.13533783014
  episode_reward_mean: -199321.24375281387
  episode_reward_min: -205525.17424476283
  episodes_this_iter: 72
  episodes_total: 10044
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.116053342819214
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003286139399278909
          model: {}
          policy_loss: 0.0017261338653042912
          total_loss: 10.001482009887695
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,157,1499.99,10045488,-199321,-195738,-205525,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10109472
  counters:
    num_agent_steps_sampled: 10109472
    num_agent_steps_trained: 10109472
    num_env_steps_sampled: 10109472
    num_env_steps_trained: 10109472
  custom_metrics: {}
  date: 2022-10-15_18-10-58
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195208.8152929717
  episode_reward_mean: -199284.96814004224
  episode_reward_min: -205525.17424476283
  episodes_this_iter: 60
  episodes_total: 10104
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1218769550323486
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007208576425909996
          model: {}
          policy_loss: 0.0065614921040833
          total_loss: 10.006392478942871
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,158,1509.42,10109472,-199285,-195209,-205525,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,158,1509.42,10109472,-199285,-195209,-205525,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10173456
  counters:
    num_agent_steps_sampled: 10173456
    num_agent_steps_trained: 10173456
    num_env_steps_sampled: 10173456
    num_env_steps_trained: 10173456
  custom_metrics: {}
  date: 2022-10-15_18-11-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194155.09407436397
  episode_reward_mean: -198858.55293734334
  episode_reward_min: -242361.0525847031
  episodes_this_iter: 60
  episodes_total: 10164
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.119784355163574
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007980816881172359
          model: {}
          policy_loss: 0.00438212975859642
          total_loss: 10.004228591918945
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,159,1519.71,10173456,-198859,-194155,-242361,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10237440
  counters:
    num_agent_steps_sampled: 10237440
    num_agent_steps_trained: 10237440
    num_env_steps_sampled: 10237440
    num_env_steps_trained: 10237440
  custom_metrics: {}
  date: 2022-10-15_18-11-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194453.7405588477
  episode_reward_mean: -198084.55429129858
  episode_reward_min: -202092.78432049896
  episodes_this_iter: 72
  episodes_total: 10236
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.115691900253296
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008745538652874529
          model: {}
          policy_loss: 0.0009801475098356605
          total_loss: 10.000845909118652
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,160,1529.26,10237440,-198085,-194454,-202093,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10301424
  counters:
    num_agent_steps_sampled: 10301424
    num_agent_steps_trained: 10301424
    num_env_steps_sampled: 10301424
    num_env_steps_trained: 10301424
  custom_metrics: {}
  date: 2022-10-15_18-11-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194453.7405588477
  episode_reward_mean: -198250.99709146097
  episode_reward_min: -202092.78432049896
  episodes_this_iter: 60
  episodes_total: 10296
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.11318302154541
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006486301426775753
          model: {}
          policy_loss: 0.006604866124689579
          total_loss: 10.006421089172363
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,161,1538.47,10301424,-198251,-194454,-202093,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10365408
  counters:
    num_agent_steps_sampled: 10365408
    num_agent_steps_trained: 10365408
    num_env_steps_sampled: 10365408
    num_env_steps_trained: 10365408
  custom_metrics: {}
  date: 2022-10-15_18-11-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194890.70996811907
  episode_reward_mean: -198246.60891001284
  episode_reward_min: -202369.45779289262
  episodes_this_iter: 60
  episodes_total: 10356
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1150782108306885
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0010350811062380672
          model: {}
          policy_loss: 0.004132954403758049
          total_loss: 10.004027366638184
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,162,1547.82,10365408,-198247,-194891,-202369,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10429392
  counters:
    num_agent_steps_sampled: 10429392
    num_agent_steps_trained: 10429392
    num_env_steps_sampled: 10429392
    num_env_steps_trained: 10429392
  custom_metrics: {}
  date: 2022-10-15_18-11-46
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -193896.71237851284
  episode_reward_mean: -198668.728497174
  episode_reward_min: -248903.2964942603
  episodes_this_iter: 72
  episodes_total: 10428
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.113553762435913
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003499123267829418
          model: {}
          policy_loss: 0.0019027820089831948
          total_loss: 10.00166130065918
          vf_expla

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,163,1557.22,10429392,-198669,-193897,-248903,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10493376
  counters:
    num_agent_steps_sampled: 10493376
    num_agent_steps_trained: 10493376
    num_env_steps_sampled: 10493376
    num_env_steps_trained: 10493376
  custom_metrics: {}
  date: 2022-10-15_18-11-56
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -193896.71237851284
  episode_reward_mean: -198415.31761053362
  episode_reward_min: -248903.2964942603
  episodes_this_iter: 60
  episodes_total: 10488
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1123604774475098
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000938701443374157
          model: {}
          policy_loss: 0.006767977494746447
          total_loss: 10.006644248962402
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,164,1566.61,10493376,-198415,-193897,-248903,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10557360
  counters:
    num_agent_steps_sampled: 10557360
    num_agent_steps_trained: 10557360
    num_env_steps_sampled: 10557360
    num_env_steps_trained: 10557360
  custom_metrics: {}
  date: 2022-10-15_18-12-05
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194021.80270128074
  episode_reward_mean: -197753.8030649573
  episode_reward_min: -202251.53184361034
  episodes_this_iter: 60
  episodes_total: 10548
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1087310314178467
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006352157215587795
          model: {}
          policy_loss: 0.004688762594014406
          total_loss: 10.004504203796387
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,165,1576.04,10557360,-197754,-194022,-202252,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10621344
  counters:
    num_agent_steps_sampled: 10621344
    num_agent_steps_trained: 10621344
    num_env_steps_sampled: 10621344
    num_env_steps_trained: 10621344
  custom_metrics: {}
  date: 2022-10-15_18-12-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194021.80270128074
  episode_reward_mean: -197487.88401411052
  episode_reward_min: -200786.876909628
  episodes_this_iter: 72
  episodes_total: 10620
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1101484298706055
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004835179715882987
          model: {}
          policy_loss: 0.0013674204237759113
          total_loss: 10.001152992248535
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,166,1585.53,10621344,-197488,-194022,-200787,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10685328
  counters:
    num_agent_steps_sampled: 10685328
    num_agent_steps_trained: 10685328
    num_env_steps_sampled: 10685328
    num_env_steps_trained: 10685328
  custom_metrics: {}
  date: 2022-10-15_18-12-24
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194131.7308952869
  episode_reward_mean: -197452.4135881642
  episode_reward_min: -201855.51595320358
  episodes_this_iter: 60
  episodes_total: 10680
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.106598138809204
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008236956782639027
          model: {}
          policy_loss: 0.006949682254344225
          total_loss: 10.006803512573242
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,167,1595.05,10685328,-197452,-194132,-201856,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10749312
  counters:
    num_agent_steps_sampled: 10749312
    num_agent_steps_trained: 10749312
    num_env_steps_sampled: 10749312
    num_env_steps_trained: 10749312
  custom_metrics: {}
  date: 2022-10-15_18-12-34
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194150.80209364172
  episode_reward_mean: -197410.25051873512
  episode_reward_min: -206428.70224828395
  episodes_this_iter: 60
  episodes_total: 10740
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1059858798980713
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005427315481938422
          model: {}
          policy_loss: 0.0049897003918886185
          total_loss: 10.00478744506836
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,168,1604.39,10749312,-197410,-194151,-206429,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10813296
  counters:
    num_agent_steps_sampled: 10813296
    num_agent_steps_trained: 10813296
    num_env_steps_sampled: 10813296
    num_env_steps_trained: 10813296
  custom_metrics: {}
  date: 2022-10-15_18-12-43
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -193717.56514046242
  episode_reward_mean: -197450.33692488502
  episode_reward_min: -204835.01006982411
  episodes_this_iter: 72
  episodes_total: 10812
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.103146553039551
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005081433337181807
          model: {}
          policy_loss: 0.0007324246107600629
          total_loss: 10.000523567199707
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,169,1613.71,10813296,-197450,-193718,-204835,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10877280
  counters:
    num_agent_steps_sampled: 10877280
    num_agent_steps_trained: 10877280
    num_env_steps_sampled: 10877280
    num_env_steps_trained: 10877280
  custom_metrics: {}
  date: 2022-10-15_18-12-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -193717.56514046242
  episode_reward_mean: -197764.16786628985
  episode_reward_min: -204835.01006982411
  episodes_this_iter: 60
  episodes_total: 10872
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.102799892425537
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007329023792408407
          model: {}
          policy_loss: 0.0053885700181126595
          total_loss: 10.00522518157959
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,170,1623.33,10877280,-197764,-193718,-204835,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 10941264
  counters:
    num_agent_steps_sampled: 10941264
    num_agent_steps_trained: 10941264
    num_env_steps_sampled: 10941264
    num_env_steps_trained: 10941264
  custom_metrics: {}
  date: 2022-10-15_18-13-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194725.15723291278
  episode_reward_mean: -197900.24714392747
  episode_reward_min: -202509.79910044346
  episodes_this_iter: 60
  episodes_total: 10932
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1075522899627686
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008935497608035803
          model: {}
          policy_loss: 0.0045671965926885605
          total_loss: 10.004434585571289
          vf_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,171,1632.97,10941264,-197900,-194725,-202510,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11005248
  counters:
    num_agent_steps_sampled: 11005248
    num_agent_steps_trained: 11005248
    num_env_steps_sampled: 11005248
    num_env_steps_trained: 11005248
  custom_metrics: {}
  date: 2022-10-15_18-13-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194140.19964143034
  episode_reward_mean: -197449.77418709983
  episode_reward_min: -202509.79910044346
  episodes_this_iter: 72
  episodes_total: 11004
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.105109453201294
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007301449659280479
          model: {}
          policy_loss: 0.0007539376383647323
          total_loss: 10.000590324401855
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,172,1642.41,11005248,-197450,-194140,-202510,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,172,1642.41,11005248,-197450,-194140,-202510,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11069232
  counters:
    num_agent_steps_sampled: 11069232
    num_agent_steps_trained: 11069232
    num_env_steps_sampled: 11069232
    num_env_steps_trained: 11069232
  custom_metrics: {}
  date: 2022-10-15_18-13-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194273.46175299367
  episode_reward_mean: -197445.97492250375
  episode_reward_min: -201668.41962905345
  episodes_this_iter: 60
  episodes_total: 11064
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1091549396514893
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0013092680601403117
          model: {}
          policy_loss: 0.006540374364703894
          total_loss: 10.006490707397461
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,173,1652.94,11069232,-197446,-194273,-201668,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11133216
  counters:
    num_agent_steps_sampled: 11133216
    num_agent_steps_trained: 11133216
    num_env_steps_sampled: 11133216
    num_env_steps_trained: 11133216
  custom_metrics: {}
  date: 2022-10-15_18-13-32
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194260.0265777544
  episode_reward_mean: -197263.25287114803
  episode_reward_min: -209722.713526288
  episodes_this_iter: 60
  episodes_total: 11124
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1059610843658447
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00045544959721155465
          model: {}
          policy_loss: 0.004699958488345146
          total_loss: 10.00447940826416
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,174,1662.34,11133216,-197263,-194260,-209723,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11197200
  counters:
    num_agent_steps_sampled: 11197200
    num_agent_steps_trained: 11197200
    num_env_steps_sampled: 11197200
    num_env_steps_trained: 11197200
  custom_metrics: {}
  date: 2022-10-15_18-13-42
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -193062.28838500215
  episode_reward_mean: -197055.81652635394
  episode_reward_min: -209722.713526288
  episodes_this_iter: 72
  episodes_total: 11196
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.10555100440979
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006951822433620691
          model: {}
          policy_loss: 0.0013905328232795
          total_loss: 10.001218795776367
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,175,1671.96,11197200,-197056,-193062,-209723,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11261184
  counters:
    num_agent_steps_sampled: 11261184
    num_agent_steps_trained: 11261184
    num_env_steps_sampled: 11261184
    num_env_steps_trained: 11261184
  custom_metrics: {}
  date: 2022-10-15_18-13-51
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -193925.3823797375
  episode_reward_mean: -196847.88838726503
  episode_reward_min: -200836.82592196163
  episodes_this_iter: 60
  episodes_total: 11256
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.109694004058838
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007008120301179588
          model: {}
          policy_loss: 0.006145994644612074
          total_loss: 10.005975723266602
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,176,1681.64,11261184,-196848,-193925,-200837,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,176,1681.64,11261184,-196848,-193925,-200837,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11325168
  counters:
    num_agent_steps_sampled: 11325168
    num_agent_steps_trained: 11325168
    num_env_steps_sampled: 11325168
    num_env_steps_trained: 11325168
  custom_metrics: {}
  date: 2022-10-15_18-14-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -193682.75115076028
  episode_reward_mean: -196973.68639887086
  episode_reward_min: -200814.25172319912
  episodes_this_iter: 60
  episodes_total: 11316
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0979413986206055
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006213672459125519
          model: {}
          policy_loss: 0.004271445330232382
          total_loss: 10.004085540771484
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,177,1692.24,11325168,-196974,-193683,-200814,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11389152
  counters:
    num_agent_steps_sampled: 11389152
    num_agent_steps_trained: 11389152
    num_env_steps_sampled: 11389152
    num_env_steps_trained: 11389152
  custom_metrics: {}
  date: 2022-10-15_18-14-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -193682.75115076028
  episode_reward_mean: -196590.54057402172
  episode_reward_min: -200822.56710592276
  episodes_this_iter: 72
  episodes_total: 11388
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0911412239074707
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000571546726860106
          model: {}
          policy_loss: 0.001044401084072888
          total_loss: 10.000849723815918
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,178,1702.03,11389152,-196591,-193683,-200823,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11453136
  counters:
    num_agent_steps_sampled: 11453136
    num_agent_steps_trained: 11453136
    num_env_steps_sampled: 11453136
    num_env_steps_trained: 11453136
  custom_metrics: {}
  date: 2022-10-15_18-14-21
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192613.42333202687
  episode_reward_mean: -196470.23687269428
  episode_reward_min: -204229.84357989574
  episodes_this_iter: 60
  episodes_total: 11448
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0953805446624756
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000500973837915808
          model: {}
          policy_loss: 0.0060297721065580845
          total_loss: 10.005819320678711
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,179,1711.33,11453136,-196470,-192613,-204230,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11517120
  counters:
    num_agent_steps_sampled: 11517120
    num_agent_steps_trained: 11517120
    num_env_steps_sampled: 11517120
    num_env_steps_trained: 11517120
  custom_metrics: {}
  date: 2022-10-15_18-14-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192613.42333202687
  episode_reward_mean: -196274.04277243617
  episode_reward_min: -201209.0676735488
  episodes_this_iter: 60
  episodes_total: 11508
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.096754312515259
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006385674932971597
          model: {}
          policy_loss: 0.0052358671091496944
          total_loss: 10.005053520202637
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,180,1720.71,11517120,-196274,-192613,-201209,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11581104
  counters:
    num_agent_steps_sampled: 11581104
    num_agent_steps_trained: 11581104
    num_env_steps_sampled: 11581104
    num_env_steps_trained: 11581104
  custom_metrics: {}
  date: 2022-10-15_18-14-40
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192750.12789678227
  episode_reward_mean: -196150.98039590026
  episode_reward_min: -203916.93579836545
  episodes_this_iter: 72
  episodes_total: 11580
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0952537059783936
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001088618184439838
          model: {}
          policy_loss: 0.0007209588657133281
          total_loss: 10.000630378723145
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,181,1729.74,11581104,-196151,-192750,-203917,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11645088
  counters:
    num_agent_steps_sampled: 11645088
    num_agent_steps_trained: 11645088
    num_env_steps_sampled: 11645088
    num_env_steps_trained: 11645088
  custom_metrics: {}
  date: 2022-10-15_18-14-49
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192217.07823663883
  episode_reward_mean: -195762.70697559867
  episode_reward_min: -203916.93579836545
  episodes_this_iter: 60
  episodes_total: 11640
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0959699153900146
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007323726895265281
          model: {}
          policy_loss: 0.006751253269612789
          total_loss: 10.006587028503418
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,182,1738.88,11645088,-195763,-192217,-203917,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,182,1738.88,11645088,-195763,-192217,-203917,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11709072
  counters:
    num_agent_steps_sampled: 11709072
    num_agent_steps_trained: 11709072
    num_env_steps_sampled: 11709072
    num_env_steps_trained: 11709072
  custom_metrics: {}
  date: 2022-10-15_18-14-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191972.55538363737
  episode_reward_mean: -195418.95133892226
  episode_reward_min: -200355.3907960356
  episodes_this_iter: 60
  episodes_total: 11700
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0992510318756104
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000387260370189324
          model: {}
          policy_loss: 0.0050446768291294575
          total_loss: 10.00481128692627
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,183,1748.74,11709072,-195419,-191973,-200355,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11773056
  counters:
    num_agent_steps_sampled: 11773056
    num_agent_steps_trained: 11773056
    num_env_steps_sampled: 11773056
    num_env_steps_trained: 11773056
  custom_metrics: {}
  date: 2022-10-15_18-15-08
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191839.6433560653
  episode_reward_mean: -195639.26313421875
  episode_reward_min: -205508.73034860074
  episodes_this_iter: 72
  episodes_total: 11772
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.098249673843384
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001190322102047503
          model: {}
          policy_loss: 0.00046215124893933535
          total_loss: 10.000391006469727
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,184,1758.21,11773056,-195639,-191840,-205509,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11837040
  counters:
    num_agent_steps_sampled: 11837040
    num_agent_steps_trained: 11837040
    num_env_steps_sampled: 11837040
    num_env_steps_trained: 11837040
  custom_metrics: {}
  date: 2022-10-15_18-15-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191839.6433560653
  episode_reward_mean: -195791.26605380117
  episode_reward_min: -205508.73034860074
  episodes_this_iter: 60
  episodes_total: 11832
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1020355224609375
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005152799421921372
          model: {}
          policy_loss: 0.005815837997943163
          total_loss: 10.005608558654785
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,185,1767.58,11837040,-195791,-191840,-205509,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11901024
  counters:
    num_agent_steps_sampled: 11901024
    num_agent_steps_trained: 11901024
    num_env_steps_sampled: 11901024
    num_env_steps_trained: 11901024
  custom_metrics: {}
  date: 2022-10-15_18-15-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192428.4950034164
  episode_reward_mean: -195624.3645795123
  episode_reward_min: -202124.97283254043
  episodes_this_iter: 60
  episodes_total: 11892
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.11061692237854
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00042768733692355454
          model: {}
          policy_loss: 0.005958036985248327
          total_loss: 10.005731582641602
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,186,1777.44,11901024,-195624,-192428,-202125,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,186,1777.44,11901024,-195624,-192428,-202125,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 11965008
  counters:
    num_agent_steps_sampled: 11965008
    num_agent_steps_trained: 11965008
    num_env_steps_sampled: 11965008
    num_env_steps_trained: 11965008
  custom_metrics: {}
  date: 2022-10-15_18-15-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192578.32378001034
  episode_reward_mean: -195479.7483674796
  episode_reward_min: -200035.92735656403
  episodes_this_iter: 72
  episodes_total: 11964
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.111236333847046
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006041722954250872
          model: {}
          policy_loss: 0.00038109306478872895
          total_loss: 10.000189781188965
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,187,1787.16,11965008,-195480,-192578,-200036,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12028992
  counters:
    num_agent_steps_sampled: 12028992
    num_agent_steps_trained: 12028992
    num_env_steps_sampled: 12028992
    num_env_steps_trained: 12028992
  custom_metrics: {}
  date: 2022-10-15_18-15-47
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191305.83809018278
  episode_reward_mean: -195497.742720241
  episode_reward_min: -199664.38028437362
  episodes_this_iter: 60
  episodes_total: 12024
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1203598976135254
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0010935441823676229
          model: {}
          policy_loss: 0.006155891343951225
          total_loss: 10.006062507629395
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,188,1796.49,12028992,-195498,-191306,-199664,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12092976
  counters:
    num_agent_steps_sampled: 12092976
    num_agent_steps_trained: 12092976
    num_env_steps_sampled: 12092976
    num_env_steps_trained: 12092976
  custom_metrics: {}
  date: 2022-10-15_18-15-56
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191711.63431438943
  episode_reward_mean: -195313.52547201983
  episode_reward_min: -200347.96663832798
  episodes_this_iter: 60
  episodes_total: 12084
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1197309494018555
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004982019308954477
          model: {}
          policy_loss: 0.005256341770291328
          total_loss: 10.005043983459473
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,189,1805.74,12092976,-195314,-191712,-200348,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12156960
  counters:
    num_agent_steps_sampled: 12156960
    num_agent_steps_trained: 12156960
    num_env_steps_sampled: 12156960
    num_env_steps_trained: 12156960
  custom_metrics: {}
  date: 2022-10-15_18-16-06
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191513.55423829408
  episode_reward_mean: -195271.95980850156
  episode_reward_min: -207276.50190679237
  episodes_this_iter: 72
  episodes_total: 12156
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.114741086959839
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00036535790422931314
          model: {}
          policy_loss: 0.0006613729055970907
          total_loss: 10.000422477722168
          vf_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,190,1815.31,12156960,-195272,-191514,-207277,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12220944
  counters:
    num_agent_steps_sampled: 12220944
    num_agent_steps_trained: 12220944
    num_env_steps_sampled: 12220944
    num_env_steps_trained: 12220944
  custom_metrics: {}
  date: 2022-10-15_18-16-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192168.92027916832
  episode_reward_mean: -195291.65853746873
  episode_reward_min: -207276.50190679237
  episodes_this_iter: 60
  episodes_total: 12216
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.115069627761841
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004075949836988002
          model: {}
          policy_loss: 0.006521804258227348
          total_loss: 10.006293296813965
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,191,1824.81,12220944,-195292,-192169,-207277,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12284928
  counters:
    num_agent_steps_sampled: 12284928
    num_agent_steps_trained: 12284928
    num_env_steps_sampled: 12284928
    num_env_steps_trained: 12284928
  custom_metrics: {}
  date: 2022-10-15_18-16-25
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192168.92027916832
  episode_reward_mean: -195292.23219260405
  episode_reward_min: -200766.04636785347
  episodes_this_iter: 60
  episodes_total: 12276
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1202070713043213
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005249574896879494
          model: {}
          policy_loss: 0.005800292827188969
          total_loss: 10.005593299865723
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,192,1834.8,12284928,-195292,-192169,-200766,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,192,1834.8,12284928,-195292,-192169,-200766,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12348912
  counters:
    num_agent_steps_sampled: 12348912
    num_agent_steps_trained: 12348912
    num_env_steps_sampled: 12348912
    num_env_steps_trained: 12348912
  custom_metrics: {}
  date: 2022-10-15_18-16-35
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -193071.29924243595
  episode_reward_mean: -195162.12683990842
  episode_reward_min: -201898.77272890083
  episodes_this_iter: 72
  episodes_total: 12348
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.117231845855713
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004072028968948871
          model: {}
          policy_loss: 0.00034335468080826104
          total_loss: 10.000113487243652
          vf_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,193,1844.39,12348912,-195162,-193071,-201899,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,193,1844.39,12348912,-195162,-193071,-201899,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12412896
  counters:
    num_agent_steps_sampled: 12412896
    num_agent_steps_trained: 12412896
    num_env_steps_sampled: 12412896
    num_env_steps_trained: 12412896
  custom_metrics: {}
  date: 2022-10-15_18-16-45
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191974.75191293945
  episode_reward_mean: -194841.13265852406
  episode_reward_min: -198389.49882416817
  episodes_this_iter: 60
  episodes_total: 12408
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1146576404571533
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004980836529284716
          model: {}
          policy_loss: 0.005655413493514061
          total_loss: 10.00544261932373
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,194,1854.94,12412896,-194841,-191975,-198389,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12476880
  counters:
    num_agent_steps_sampled: 12476880
    num_agent_steps_trained: 12476880
    num_env_steps_sampled: 12476880
    num_env_steps_trained: 12476880
  custom_metrics: {}
  date: 2022-10-15_18-16-56
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191974.75191293945
  episode_reward_mean: -195088.25552743475
  episode_reward_min: -198390.97836715638
  episodes_this_iter: 60
  episodes_total: 12468
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1122734546661377
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008813755121082067
          model: {}
          policy_loss: 0.005916025955229998
          total_loss: 10.005781173706055
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,195,1865.23,12476880,-195088,-191975,-198391,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,195,1865.23,12476880,-195088,-191975,-198391,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12540864
  counters:
    num_agent_steps_sampled: 12540864
    num_agent_steps_trained: 12540864
    num_env_steps_sampled: 12540864
    num_env_steps_trained: 12540864
  custom_metrics: {}
  date: 2022-10-15_18-17-05
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -190672.81684305417
  episode_reward_mean: -194839.94413951068
  episode_reward_min: -204820.6393425539
  episodes_this_iter: 72
  episodes_total: 12540
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1066508293151855
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00042972754454240203
          model: {}
          policy_loss: 0.000525987590663135
          total_loss: 10.000300407409668
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,196,1874.84,12540864,-194840,-190673,-204821,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12604848
  counters:
    num_agent_steps_sampled: 12604848
    num_agent_steps_trained: 12604848
    num_env_steps_sampled: 12604848
    num_env_steps_trained: 12604848
  custom_metrics: {}
  date: 2022-10-15_18-17-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -190672.81684305417
  episode_reward_mean: -194743.52437988532
  episode_reward_min: -204820.6393425539
  episodes_this_iter: 60
  episodes_total: 12600
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.105902910232544
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005130438366904855
          model: {}
          policy_loss: 0.005939910653978586
          total_loss: 10.005731582641602
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,197,1884.55,12604848,-194744,-190673,-204821,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12668832
  counters:
    num_agent_steps_sampled: 12668832
    num_agent_steps_trained: 12668832
    num_env_steps_sampled: 12668832
    num_env_steps_trained: 12668832
  custom_metrics: {}
  date: 2022-10-15_18-17-24
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191515.8690416077
  episode_reward_mean: -194898.1995693789
  episode_reward_min: -199049.4612761541
  episodes_this_iter: 60
  episodes_total: 12660
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1080994606018066
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006616516038775444
          model: {}
          policy_loss: 0.00533079681918025
          total_loss: 10.00515079498291
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,198,1893.93,12668832,-194898,-191516,-199049,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12732816
  counters:
    num_agent_steps_sampled: 12732816
    num_agent_steps_trained: 12732816
    num_env_steps_sampled: 12732816
    num_env_steps_trained: 12732816
  custom_metrics: {}
  date: 2022-10-15_18-17-35
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191129.81385884332
  episode_reward_mean: -194568.74090561387
  episode_reward_min: -199424.82433262374
  episodes_this_iter: 72
  episodes_total: 12732
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1053125858306885
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007091911393217742
          model: {}
          policy_loss: -0.0004779269511345774
          total_loss: 9.999351501464844
          vf_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,199,1903.99,12732816,-194569,-191130,-199425,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12796800
  counters:
    num_agent_steps_sampled: 12796800
    num_agent_steps_trained: 12796800
    num_env_steps_sampled: 12796800
    num_env_steps_trained: 12796800
  custom_metrics: {}
  date: 2022-10-15_18-17-44
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -190358.31593205556
  episode_reward_mean: -194343.25400471
  episode_reward_min: -199424.82433262374
  episodes_this_iter: 60
  episodes_total: 12792
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.098520040512085
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006763115525245667
          model: {}
          policy_loss: 0.006477045826613903
          total_loss: 10.006302833557129
          vf_expla

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,200,1913.29,12796800,-194343,-190358,-199425,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12860784
  counters:
    num_agent_steps_sampled: 12860784
    num_agent_steps_trained: 12860784
    num_env_steps_sampled: 12860784
    num_env_steps_trained: 12860784
  custom_metrics: {}
  date: 2022-10-15_18-17-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -190358.31593205556
  episode_reward_mean: -194390.67130051178
  episode_reward_min: -197560.01132858676
  episodes_this_iter: 60
  episodes_total: 12852
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0985288619995117
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007683765725232661
          model: {}
          policy_loss: 0.005664716474711895
          total_loss: 10.005508422851562
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,201,1922.65,12860784,-194391,-190358,-197560,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12924768
  counters:
    num_agent_steps_sampled: 12924768
    num_agent_steps_trained: 12924768
    num_env_steps_sampled: 12924768
    num_env_steps_trained: 12924768
  custom_metrics: {}
  date: 2022-10-15_18-18-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189790.51887089308
  episode_reward_mean: -194330.68073344324
  episode_reward_min: -197406.7233829428
  episodes_this_iter: 72
  episodes_total: 12924
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.096658706665039
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0014591491781175137
          model: {}
          policy_loss: -0.000762587005738169
          total_loss: 9.999220848083496
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,202,1931.95,12924768,-194331,-189791,-197407,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 12988752
  counters:
    num_agent_steps_sampled: 12988752
    num_agent_steps_trained: 12988752
    num_env_steps_sampled: 12988752
    num_env_steps_trained: 12988752
  custom_metrics: {}
  date: 2022-10-15_18-18-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191330.4776732859
  episode_reward_mean: -193999.67099924
  episode_reward_min: -197381.84203919704
  episodes_this_iter: 60
  episodes_total: 12984
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0964560508728027
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004325784102547914
          model: {}
          policy_loss: 0.006012150086462498
          total_loss: 10.005789756774902
          vf_expla

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,203,1941.68,12988752,-194000,-191330,-197382,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13052736
  counters:
    num_agent_steps_sampled: 13052736
    num_agent_steps_trained: 13052736
    num_env_steps_sampled: 13052736
    num_env_steps_trained: 13052736
  custom_metrics: {}
  date: 2022-10-15_18-18-22
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -190941.85988333327
  episode_reward_mean: -194039.40694359396
  episode_reward_min: -198338.3397430027
  episodes_this_iter: 60
  episodes_total: 13044
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0980165004730225
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007822811603546143
          model: {}
          policy_loss: 0.005262515041977167
          total_loss: 10.005109786987305
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,204,1951.12,13052736,-194039,-190942,-198338,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,204,1951.12,13052736,-194039,-190942,-198338,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13116720
  counters:
    num_agent_steps_sampled: 13116720
    num_agent_steps_trained: 13116720
    num_env_steps_sampled: 13116720
    num_env_steps_trained: 13116720
  custom_metrics: {}
  date: 2022-10-15_18-18-32
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -190339.3611579891
  episode_reward_mean: -194393.8742494724
  episode_reward_min: -200699.6974262095
  episodes_this_iter: 72
  episodes_total: 13116
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.09889817237854
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003223623789381236
          model: {}
          policy_loss: -0.00021210857084952295
          total_loss: 9.999543190002441
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,205,1961.49,13116720,-194394,-190339,-200700,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13180704
  counters:
    num_agent_steps_sampled: 13180704
    num_agent_steps_trained: 13180704
    num_env_steps_sampled: 13180704
    num_env_steps_trained: 13180704
  custom_metrics: {}
  date: 2022-10-15_18-18-42
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -190817.50829317654
  episode_reward_mean: -194546.60718744586
  episode_reward_min: -200699.6974262095
  episodes_this_iter: 60
  episodes_total: 13176
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.096421480178833
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004822109476663172
          model: {}
          policy_loss: 0.00585835799574852
          total_loss: 10.005646705627441
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,206,1971.49,13180704,-194547,-190818,-200700,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13244688
  counters:
    num_agent_steps_sampled: 13244688
    num_agent_steps_trained: 13244688
    num_env_steps_sampled: 13244688
    num_env_steps_trained: 13244688
  custom_metrics: {}
  date: 2022-10-15_18-18-52
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192074.05425079804
  episode_reward_mean: -194808.85372547276
  episode_reward_min: -199193.99489665977
  episodes_this_iter: 60
  episodes_total: 13236
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.104572057723999
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0012397636892274022
          model: {}
          policy_loss: 0.006068488582968712
          total_loss: 10.00600528717041
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,207,1980.74,13244688,-194809,-192074,-199194,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13308672
  counters:
    num_agent_steps_sampled: 13308672
    num_agent_steps_trained: 13308672
    num_env_steps_sampled: 13308672
    num_env_steps_trained: 13308672
  custom_metrics: {}
  date: 2022-10-15_18-19-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191758.45154312026
  episode_reward_mean: -194875.99233413607
  episode_reward_min: -200892.29090575452
  episodes_this_iter: 72
  episodes_total: 13308
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1097023487091064
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0010692079085856676
          model: {}
          policy_loss: -0.0007878641481511295
          total_loss: 9.999115943908691
          vf_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,208,1991.02,13308672,-194876,-191758,-200892,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,208,1991.02,13308672,-194876,-191758,-200892,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,208,1991.02,13308672,-194876,-191758,-200892,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13372656
  counters:
    num_agent_steps_sampled: 13372656
    num_agent_steps_trained: 13372656
    num_env_steps_sampled: 13372656
    num_env_steps_trained: 13372656
  custom_metrics: {}
  date: 2022-10-15_18-19-13
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191274.76427217663
  episode_reward_mean: -194560.06515234854
  episode_reward_min: -200892.29090575452
  episodes_this_iter: 60
  episodes_total: 13368
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.115571975708008
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0011492474004626274
          model: {}
          policy_loss: 0.005654740147292614
          total_loss: 10.005571365356445
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,209,2001.85,13372656,-194560,-191275,-200892,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13436640
  counters:
    num_agent_steps_sampled: 13436640
    num_agent_steps_trained: 13436640
    num_env_steps_sampled: 13436640
    num_env_steps_trained: 13436640
  custom_metrics: {}
  date: 2022-10-15_18-19-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189612.56883172152
  episode_reward_mean: -193919.1258087778
  episode_reward_min: -197372.98858889667
  episodes_this_iter: 60
  episodes_total: 13428
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1090359687805176
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007096562767401338
          model: {}
          policy_loss: 0.006337576545774937
          total_loss: 10.006168365478516
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,210,2011.49,13436640,-193919,-189613,-197373,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13500624
  counters:
    num_agent_steps_sampled: 13500624
    num_agent_steps_trained: 13500624
    num_env_steps_sampled: 13500624
    num_env_steps_trained: 13500624
  custom_metrics: {}
  date: 2022-10-15_18-19-33
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188959.25968225475
  episode_reward_mean: -193384.33150006155
  episode_reward_min: -209547.92768035337
  episodes_this_iter: 72
  episodes_total: 13500
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1107521057128906
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001654784195125103
          model: {}
          policy_loss: -0.0005165744223631918
          total_loss: 9.999503135681152
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,211,2021.51,13500624,-193384,-188959,-209548,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13564608
  counters:
    num_agent_steps_sampled: 13564608
    num_agent_steps_trained: 13564608
    num_env_steps_sampled: 13564608
    num_env_steps_trained: 13564608
  custom_metrics: {}
  date: 2022-10-15_18-19-42
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188959.25968225475
  episode_reward_mean: -193107.44597237566
  episode_reward_min: -209547.92768035337
  episodes_this_iter: 60
  episodes_total: 13560
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1143884658813477
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005735938320867717
          model: {}
          policy_loss: 0.0056685153394937515
          total_loss: 10.005472183227539
          vf_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,212,2030.97,13564608,-193107,-188959,-209548,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,212,2030.97,13564608,-193107,-188959,-209548,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13628592
  counters:
    num_agent_steps_sampled: 13628592
    num_agent_steps_trained: 13628592
    num_env_steps_sampled: 13628592
    num_env_steps_trained: 13628592
  custom_metrics: {}
  date: 2022-10-15_18-19-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189505.20723946433
  episode_reward_mean: -192800.05024884464
  episode_reward_min: -198132.63542563716
  episodes_this_iter: 60
  episodes_total: 13620
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.112642765045166
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0009246742120012641
          model: {}
          policy_loss: 0.006743175443261862
          total_loss: 10.006616592407227
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,213,2041.73,13628592,-192800,-189505,-198133,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13692576
  counters:
    num_agent_steps_sampled: 13692576
    num_agent_steps_trained: 13692576
    num_env_steps_sampled: 13692576
    num_env_steps_trained: 13692576
  custom_metrics: {}
  date: 2022-10-15_18-20-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189812.10295323376
  episode_reward_mean: -192992.14647656822
  episode_reward_min: -199423.32466750185
  episodes_this_iter: 72
  episodes_total: 13692
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1160712242126465
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0012184720253571868
          model: {}
          policy_loss: -0.0009212022996507585
          total_loss: 9.999011993408203
          vf_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,214,2051.09,13692576,-192992,-189812,-199423,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13756560
  counters:
    num_agent_steps_sampled: 13756560
    num_agent_steps_trained: 13756560
    num_env_steps_sampled: 13756560
    num_env_steps_trained: 13756560
  custom_metrics: {}
  date: 2022-10-15_18-20-11
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189981.04526945626
  episode_reward_mean: -192813.1057113224
  episode_reward_min: -196412.4040902111
  episodes_this_iter: 60
  episodes_total: 13752
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1162869930267334
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005849587614648044
          model: {}
          policy_loss: 0.0063365246169269085
          total_loss: 10.006143569946289
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,215,2059.77,13756560,-192813,-189981,-196412,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13820544
  counters:
    num_agent_steps_sampled: 13820544
    num_agent_steps_trained: 13820544
    num_env_steps_sampled: 13820544
    num_env_steps_trained: 13820544
  custom_metrics: {}
  date: 2022-10-15_18-20-20
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -190066.88790774657
  episode_reward_mean: -192721.9346567685
  episode_reward_min: -196279.73461513923
  episodes_this_iter: 60
  episodes_total: 13812
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1158719062805176
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00031377977575175464
          model: {}
          policy_loss: 0.006389504764229059
          total_loss: 10.00614070892334
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,216,2069.04,13820544,-192722,-190067,-196280,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13884528
  counters:
    num_agent_steps_sampled: 13884528
    num_agent_steps_trained: 13884528
    num_env_steps_sampled: 13884528
    num_env_steps_trained: 13884528
  custom_metrics: {}
  date: 2022-10-15_18-20-30
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189533.27401744912
  episode_reward_mean: -193103.7524107704
  episode_reward_min: -198886.97311562867
  episodes_this_iter: 72
  episodes_total: 13884
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1132659912109375
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008624577312730253
          model: {}
          policy_loss: -0.0005876872455701232
          total_loss: 9.999275207519531
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,217,2078.85,13884528,-193104,-189533,-198887,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 13948512
  counters:
    num_agent_steps_sampled: 13948512
    num_agent_steps_trained: 13948512
    num_env_steps_sampled: 13948512
    num_env_steps_trained: 13948512
  custom_metrics: {}
  date: 2022-10-15_18-20-39
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187810.11831035864
  episode_reward_mean: -193284.94011683497
  episode_reward_min: -198886.97311562867
  episodes_this_iter: 60
  episodes_total: 13944
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.107599973678589
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008235009736381471
          model: {}
          policy_loss: 0.005540563724935055
          total_loss: 10.005393981933594
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,218,2087.99,13948512,-193285,-187810,-198887,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14012496
  counters:
    num_agent_steps_sampled: 14012496
    num_agent_steps_trained: 14012496
    num_env_steps_sampled: 14012496
    num_env_steps_trained: 14012496
  custom_metrics: {}
  date: 2022-10-15_18-20-49
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187810.11831035864
  episode_reward_mean: -192896.53981270164
  episode_reward_min: -197233.62523603922
  episodes_this_iter: 60
  episodes_total: 14004
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1078598499298096
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00033316065673716366
          model: {}
          policy_loss: 0.007274224888533354
          total_loss: 10.007030487060547
          vf_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,219,2097.6,14012496,-192897,-187810,-197234,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14076480
  counters:
    num_agent_steps_sampled: 14076480
    num_agent_steps_trained: 14076480
    num_env_steps_sampled: 14076480
    num_env_steps_trained: 14076480
  custom_metrics: {}
  date: 2022-10-15_18-20-58
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189344.3800151262
  episode_reward_mean: -192671.28523637506
  episode_reward_min: -206262.92848289327
  episodes_this_iter: 72
  episodes_total: 14076
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.106194019317627
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008955756202340126
          model: {}
          policy_loss: -0.0013472585706040263
          total_loss: 9.998520851135254
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,220,2106.67,14076480,-192671,-189344,-206263,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14140464
  counters:
    num_agent_steps_sampled: 14140464
    num_agent_steps_trained: 14140464
    num_env_steps_sampled: 14140464
    num_env_steps_trained: 14140464
  custom_metrics: {}
  date: 2022-10-15_18-21-08
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188818.7086978596
  episode_reward_mean: -192510.74797347543
  episode_reward_min: -198423.19658113428
  episodes_this_iter: 60
  episodes_total: 14136
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1095130443573
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003457639249972999
          model: {}
          policy_loss: 0.006373499985784292
          total_loss: 10.00613021850586
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,221,2116.15,14140464,-192511,-188819,-198423,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14204448
  counters:
    num_agent_steps_sampled: 14204448
    num_agent_steps_trained: 14204448
    num_env_steps_sampled: 14204448
    num_env_steps_trained: 14204448
  custom_metrics: {}
  date: 2022-10-15_18-21-17
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188731.97125522874
  episode_reward_mean: -192718.42618555346
  episode_reward_min: -196245.88788381993
  episodes_this_iter: 60
  episodes_total: 14196
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.107048988342285
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004341437597759068
          model: {}
          policy_loss: 0.005856323521584272
          total_loss: 10.005632400512695
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,222,2125.78,14204448,-192718,-188732,-196246,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14268432
  counters:
    num_agent_steps_sampled: 14268432
    num_agent_steps_trained: 14268432
    num_env_steps_sampled: 14268432
    num_env_steps_trained: 14268432
  custom_metrics: {}
  date: 2022-10-15_18-21-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188731.97125522874
  episode_reward_mean: -192703.32884790708
  episode_reward_min: -198863.04719580786
  episodes_this_iter: 72
  episodes_total: 14268
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1037654876708984
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007657678215764463
          model: {}
          policy_loss: -0.0010821159230545163
          total_loss: 9.998759269714355
          vf_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,223,2135.25,14268432,-192703,-188732,-198863,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14332416
  counters:
    num_agent_steps_sampled: 14332416
    num_agent_steps_trained: 14332416
    num_env_steps_sampled: 14332416
    num_env_steps_trained: 14332416
  custom_metrics: {}
  date: 2022-10-15_18-21-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188823.17489631218
  episode_reward_mean: -192481.46958074343
  episode_reward_min: -206697.28377211024
  episodes_this_iter: 60
  episodes_total: 14328
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1077911853790283
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0011489372700452805
          model: {}
          policy_loss: 0.005456771235913038
          total_loss: 10.005374908447266
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,224,2145.1,14332416,-192481,-188823,-206697,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14396400
  counters:
    num_agent_steps_sampled: 14396400
    num_agent_steps_trained: 14396400
    num_env_steps_sampled: 14396400
    num_env_steps_trained: 14396400
  custom_metrics: {}
  date: 2022-10-15_18-21-46
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188823.17489631218
  episode_reward_mean: -192914.79786049933
  episode_reward_min: -206697.28377211024
  episodes_this_iter: 60
  episodes_total: 14388
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.106255054473877
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0009181093773804605
          model: {}
          policy_loss: 0.006457505747675896
          total_loss: 10.006331443786621
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,225,2154.26,14396400,-192915,-188823,-206697,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14460384
  counters:
    num_agent_steps_sampled: 14460384
    num_agent_steps_trained: 14460384
    num_env_steps_sampled: 14460384
    num_env_steps_trained: 14460384
  custom_metrics: {}
  date: 2022-10-15_18-21-54
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189187.5593235322
  episode_reward_mean: -192972.50492044108
  episode_reward_min: -203726.23515527378
  episodes_this_iter: 72
  episodes_total: 14460
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1053807735443115
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006671907030977309
          model: {}
          policy_loss: -0.001297987182624638
          total_loss: 9.99852466583252
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,226,2162.82,14460384,-192973,-189188,-203726,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14524368
  counters:
    num_agent_steps_sampled: 14524368
    num_agent_steps_trained: 14524368
    num_env_steps_sampled: 14524368
    num_env_steps_trained: 14524368
  custom_metrics: {}
  date: 2022-10-15_18-22-04
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189369.8331432017
  episode_reward_mean: -192436.70253924755
  episode_reward_min: -199022.56676395136
  episodes_this_iter: 60
  episodes_total: 14520
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1030800342559814
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0009126872755587101
          model: {}
          policy_loss: 0.006187467370182276
          total_loss: 10.006058692932129
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,227,2171.71,14524368,-192437,-189370,-199023,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14588352
  counters:
    num_agent_steps_sampled: 14588352
    num_agent_steps_trained: 14588352
    num_env_steps_sampled: 14588352
    num_env_steps_trained: 14588352
  custom_metrics: {}
  date: 2022-10-15_18-22-14
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189369.8331432017
  episode_reward_mean: -192264.2466671214
  episode_reward_min: -202898.54026271947
  episodes_this_iter: 60
  episodes_total: 14580
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.103538990020752
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008636124548502266
          model: {}
          policy_loss: 0.005975666455924511
          total_loss: 10.005838394165039
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,228,2181.05,14588352,-192264,-189370,-202899,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14652336
  counters:
    num_agent_steps_sampled: 14652336
    num_agent_steps_trained: 14652336
    num_env_steps_sampled: 14652336
    num_env_steps_trained: 14652336
  custom_metrics: {}
  date: 2022-10-15_18-22-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189053.95255389292
  episode_reward_mean: -192469.8876337903
  episode_reward_min: -202898.54026271947
  episodes_this_iter: 72
  episodes_total: 14652
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1015119552612305
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000926572538446635
          model: {}
          policy_loss: -0.002419488737359643
          total_loss: 9.997455596923828
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,229,2190.21,14652336,-192470,-189054,-202899,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14716320
  counters:
    num_agent_steps_sampled: 14716320
    num_agent_steps_trained: 14716320
    num_env_steps_sampled: 14716320
    num_env_steps_trained: 14716320
  custom_metrics: {}
  date: 2022-10-15_18-22-33
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189252.35264222167
  episode_reward_mean: -192544.73253258097
  episode_reward_min: -196825.56842935827
  episodes_this_iter: 60
  episodes_total: 14712
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0981693267822266
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005401559756137431
          model: {}
          policy_loss: 0.00532532949000597
          total_loss: 10.005125045776367
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,230,2200.34,14716320,-192545,-189252,-196826,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14780304
  counters:
    num_agent_steps_sampled: 14780304
    num_agent_steps_trained: 14780304
    num_env_steps_sampled: 14780304
    num_env_steps_trained: 14780304
  custom_metrics: {}
  date: 2022-10-15_18-22-43
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189252.35264222167
  episode_reward_mean: -192570.863228437
  episode_reward_min: -197007.6983980954
  episodes_this_iter: 60
  episodes_total: 14772
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0977134704589844
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0012515957932919264
          model: {}
          policy_loss: 0.006794318091124296
          total_loss: 10.006735801696777
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,231,2210.05,14780304,-192571,-189252,-197008,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14844288
  counters:
    num_agent_steps_sampled: 14844288
    num_agent_steps_trained: 14844288
    num_env_steps_sampled: 14844288
    num_env_steps_trained: 14844288
  custom_metrics: {}
  date: 2022-10-15_18-22-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189865.4716155863
  episode_reward_mean: -192671.23414440508
  episode_reward_min: -206526.9693160583
  episodes_this_iter: 72
  episodes_total: 14844
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.091932773590088
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005954584339633584
          model: {}
          policy_loss: -0.0021823649294674397
          total_loss: 9.997627258300781
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,232,2219.73,14844288,-192671,-189865,-206527,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14908272
  counters:
    num_agent_steps_sampled: 14908272
    num_agent_steps_trained: 14908272
    num_env_steps_sampled: 14908272
    num_env_steps_trained: 14908272
  custom_metrics: {}
  date: 2022-10-15_18-23-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189622.32485737462
  episode_reward_mean: -192817.64763421737
  episode_reward_min: -206526.9693160583
  episodes_this_iter: 60
  episodes_total: 14904
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0880234241485596
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006712102331221104
          model: {}
          policy_loss: 0.005983317736536264
          total_loss: 10.005807876586914
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,233,2229.15,14908272,-192818,-189622,-206527,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 14972256
  counters:
    num_agent_steps_sampled: 14972256
    num_agent_steps_trained: 14972256
    num_env_steps_sampled: 14972256
    num_env_steps_trained: 14972256
  custom_metrics: {}
  date: 2022-10-15_18-23-11
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189622.32485737462
  episode_reward_mean: -192658.83099376544
  episode_reward_min: -206477.6404241671
  episodes_this_iter: 60
  episodes_total: 14964
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.081996440887451
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0015067653730511665
          model: {}
          policy_loss: 0.006302772089838982
          total_loss: 10.006296157836914
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,234,2238.39,14972256,-192659,-189622,-206478,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15036240
  counters:
    num_agent_steps_sampled: 15036240
    num_agent_steps_trained: 15036240
    num_env_steps_sampled: 15036240
    num_env_steps_trained: 15036240
  custom_metrics: {}
  date: 2022-10-15_18-23-20
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189013.85954612147
  episode_reward_mean: -192261.91665790774
  episode_reward_min: -195431.4039932223
  episodes_this_iter: 72
  episodes_total: 15036
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.082390308380127
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003391635837033391
          model: {}
          policy_loss: -0.0025860071182250977
          total_loss: 9.997173309326172
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,235,2247.61,15036240,-192262,-189014,-195431,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15100224
  counters:
    num_agent_steps_sampled: 15100224
    num_agent_steps_trained: 15100224
    num_env_steps_sampled: 15100224
    num_env_steps_trained: 15100224
  custom_metrics: {}
  date: 2022-10-15_18-23-30
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188293.3607726563
  episode_reward_mean: -192077.73124517195
  episode_reward_min: -202618.26805136085
  episodes_this_iter: 60
  episodes_total: 15096
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.083049774169922
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00093724305042997
          model: {}
          policy_loss: 0.00584028847515583
          total_loss: 10.005719184875488
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,236,2257.21,15100224,-192078,-188293,-202618,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15164208
  counters:
    num_agent_steps_sampled: 15164208
    num_agent_steps_trained: 15164208
    num_env_steps_sampled: 15164208
    num_env_steps_trained: 15164208
  custom_metrics: {}
  date: 2022-10-15_18-23-39
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188293.3607726563
  episode_reward_mean: -192180.75370381266
  episode_reward_min: -202618.26805136085
  episodes_this_iter: 60
  episodes_total: 15156
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0901057720184326
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000539999979082495
          model: {}
          policy_loss: 0.007277295924723148
          total_loss: 10.007076263427734
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,237,2266.52,15164208,-192181,-188293,-202618,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15228192
  counters:
    num_agent_steps_sampled: 15228192
    num_agent_steps_trained: 15228192
    num_env_steps_sampled: 15228192
    num_env_steps_trained: 15228192
  custom_metrics: {}
  date: 2022-10-15_18-23-49
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189374.9036425284
  episode_reward_mean: -192983.8574584805
  episode_reward_min: -202930.91272268034
  episodes_this_iter: 72
  episodes_total: 15228
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.093052625656128
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003109672397840768
          model: {}
          policy_loss: -0.0018308745929971337
          total_loss: 9.997922897338867
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,238,2275.59,15228192,-192984,-189375,-202931,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15292176
  counters:
    num_agent_steps_sampled: 15292176
    num_agent_steps_trained: 15292176
    num_env_steps_sampled: 15292176
    num_env_steps_trained: 15292176
  custom_metrics: {}
  date: 2022-10-15_18-23-58
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189532.6295005331
  episode_reward_mean: -192872.162186988
  episode_reward_min: -202930.91272268034
  episodes_this_iter: 60
  episodes_total: 15288
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.093376636505127
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000506012060213834
          model: {}
          policy_loss: 0.005773572251200676
          total_loss: 10.00556468963623
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,239,2284.68,15292176,-192872,-189533,-202931,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15356160
  counters:
    num_agent_steps_sampled: 15356160
    num_agent_steps_trained: 15356160
    num_env_steps_sampled: 15356160
    num_env_steps_trained: 15356160
  custom_metrics: {}
  date: 2022-10-15_18-24-07
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189532.6295005331
  episode_reward_mean: -193239.71802578936
  episode_reward_min: -213502.68881188537
  episodes_this_iter: 60
  episodes_total: 15348
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0966298580169678
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006053762626834214
          model: {}
          policy_loss: 0.007457571569830179
          total_loss: 10.007267951965332
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,240,2294.09,15356160,-193240,-189533,-213503,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15420144
  counters:
    num_agent_steps_sampled: 15420144
    num_agent_steps_trained: 15420144
    num_env_steps_sampled: 15420144
    num_env_steps_trained: 15420144
  custom_metrics: {}
  date: 2022-10-15_18-24-16
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189972.3279768097
  episode_reward_mean: -193483.96363448908
  episode_reward_min: -209951.13890434123
  episodes_this_iter: 72
  episodes_total: 15420
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.095569372177124
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007679268601350486
          model: {}
          policy_loss: -0.002130345441401005
          total_loss: 9.997714042663574
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,241,2302.85,15420144,-193484,-189972,-209951,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15484128
  counters:
    num_agent_steps_sampled: 15484128
    num_agent_steps_trained: 15484128
    num_env_steps_sampled: 15484128
    num_env_steps_trained: 15484128
  custom_metrics: {}
  date: 2022-10-15_18-24-24
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188463.71212142735
  episode_reward_mean: -192705.68110771247
  episode_reward_min: -204000.17875187894
  episodes_this_iter: 60
  episodes_total: 15480
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.097053050994873
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007225694134831429
          model: {}
          policy_loss: 0.00527820223942399
          total_loss: 10.005112648010254
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,242,2311.21,15484128,-192706,-188464,-204000,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15548112
  counters:
    num_agent_steps_sampled: 15548112
    num_agent_steps_trained: 15548112
    num_env_steps_sampled: 15548112
    num_env_steps_trained: 15548112
  custom_metrics: {}
  date: 2022-10-15_18-24-34
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188463.71212142735
  episode_reward_mean: -192358.66763395813
  episode_reward_min: -210069.4859653056
  episodes_this_iter: 60
  episodes_total: 15540
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.096004009246826
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005404495750553906
          model: {}
          policy_loss: 0.006578435190021992
          total_loss: 10.006376266479492
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,243,2320.77,15548112,-192359,-188464,-210069,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15612096
  counters:
    num_agent_steps_sampled: 15612096
    num_agent_steps_trained: 15612096
    num_env_steps_sampled: 15612096
    num_env_steps_trained: 15612096
  custom_metrics: {}
  date: 2022-10-15_18-24-43
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188424.29088110686
  episode_reward_mean: -191674.2251958378
  episode_reward_min: -198485.97770636537
  episodes_this_iter: 72
  episodes_total: 15612
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.096067190170288
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0009105291683226824
          model: {}
          policy_loss: -0.0020247576758265495
          total_loss: 9.997847557067871
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,244,2330.12,15612096,-191674,-188424,-198486,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15676080
  counters:
    num_agent_steps_sampled: 15676080
    num_agent_steps_trained: 15676080
    num_env_steps_sampled: 15676080
    num_env_steps_trained: 15676080
  custom_metrics: {}
  date: 2022-10-15_18-24-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187705.03523260445
  episode_reward_mean: -191598.599337599
  episode_reward_min: -200634.92864797765
  episodes_this_iter: 60
  episodes_total: 15672
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.099968671798706
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00048700885963626206
          model: {}
          policy_loss: 0.0059179989621043205
          total_loss: 10.005705833435059
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,245,2339.26,15676080,-191599,-187705,-200635,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15740064
  counters:
    num_agent_steps_sampled: 15740064
    num_agent_steps_trained: 15740064
    num_env_steps_sampled: 15740064
    num_env_steps_trained: 15740064
  custom_metrics: {}
  date: 2022-10-15_18-25-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187705.03523260445
  episode_reward_mean: -191083.73689303015
  episode_reward_min: -197892.99919845044
  episodes_this_iter: 60
  episodes_total: 15732
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1021220684051514
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007320968434214592
          model: {}
          policy_loss: 0.0071622831746935844
          total_loss: 10.006999969482422
          vf_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,246,2349.05,15740064,-191084,-187705,-197893,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,246,2349.05,15740064,-191084,-187705,-197893,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15804048
  counters:
    num_agent_steps_sampled: 15804048
    num_agent_steps_trained: 15804048
    num_env_steps_sampled: 15804048
    num_env_steps_trained: 15804048
  custom_metrics: {}
  date: 2022-10-15_18-25-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187116.2527508529
  episode_reward_mean: -191672.7193689028
  episode_reward_min: -217065.1196443312
  episodes_this_iter: 72
  episodes_total: 15804
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1026830673217773
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0003826653119176626
          model: {}
          policy_loss: -0.0028248976450413465
          total_loss: 9.996941566467285
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,247,2358.46,15804048,-191673,-187116,-217065,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15868032
  counters:
    num_agent_steps_sampled: 15868032
    num_agent_steps_trained: 15868032
    num_env_steps_sampled: 15868032
    num_env_steps_trained: 15868032
  custom_metrics: {}
  date: 2022-10-15_18-25-22
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187867.36122460547
  episode_reward_mean: -191599.72991320622
  episode_reward_min: -217065.1196443312
  episodes_this_iter: 60
  episodes_total: 15864
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.100210189819336
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008237868314608932
          model: {}
          policy_loss: 0.005467996932566166
          total_loss: 10.00532341003418
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,248,2368.11,15868032,-191600,-187867,-217065,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15932016
  counters:
    num_agent_steps_sampled: 15932016
    num_agent_steps_trained: 15932016
    num_env_steps_sampled: 15932016
    num_env_steps_trained: 15932016
  custom_metrics: {}
  date: 2022-10-15_18-25-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187867.36122460547
  episode_reward_mean: -191806.64537870124
  episode_reward_min: -214549.13058164722
  episodes_this_iter: 60
  episodes_total: 15924
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.103611707687378
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00046999819460324943
          model: {}
          policy_loss: 0.007467182818800211
          total_loss: 10.00725269317627
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,249,2377.32,15932016,-191807,-187867,-214549,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 15996000
  counters:
    num_agent_steps_sampled: 15996000
    num_agent_steps_trained: 15996000
    num_env_steps_sampled: 15996000
    num_env_steps_trained: 15996000
  custom_metrics: {}
  date: 2022-10-15_18-25-40
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188516.8140987884
  episode_reward_mean: -191868.9426431584
  episode_reward_min: -214549.13058164722
  episodes_this_iter: 72
  episodes_total: 15996
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.107570171356201
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008016292122192681
          model: {}
          policy_loss: -0.0038058862555772066
          total_loss: 9.996042251586914
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,250,2386.86,15996000,-191869,-188517,-214549,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16059984
  counters:
    num_agent_steps_sampled: 16059984
    num_agent_steps_trained: 16059984
    num_env_steps_sampled: 16059984
    num_env_steps_trained: 16059984
  custom_metrics: {}
  date: 2022-10-15_18-25-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187793.06285268904
  episode_reward_mean: -191463.09912777078
  episode_reward_min: -203542.1811713846
  episodes_this_iter: 60
  episodes_total: 16056
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1066107749938965
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00038216603570617735
          model: {}
          policy_loss: 0.006209576036781073
          total_loss: 10.005975723266602
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,251,2396.49,16059984,-191463,-187793,-203542,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16123968
  counters:
    num_agent_steps_sampled: 16123968
    num_agent_steps_trained: 16123968
    num_env_steps_sampled: 16123968
    num_env_steps_trained: 16123968
  custom_metrics: {}
  date: 2022-10-15_18-25-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188009.04607286467
  episode_reward_mean: -191603.4590843967
  episode_reward_min: -201486.06732959877
  episodes_this_iter: 60
  episodes_total: 16116
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.112828254699707
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005847421707585454
          model: {}
          policy_loss: 0.007334425579756498
          total_loss: 10.00714111328125
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,252,2405.71,16123968,-191603,-188009,-201486,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16187952
  counters:
    num_agent_steps_sampled: 16187952
    num_agent_steps_trained: 16187952
    num_env_steps_sampled: 16187952
    num_env_steps_trained: 16187952
  custom_metrics: {}
  date: 2022-10-15_18-26-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187784.8818006754
  episode_reward_mean: -191454.48837430333
  episode_reward_min: -202895.1952958097
  episodes_this_iter: 60
  episodes_total: 16176
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1097919940948486
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005899094394408166
          model: {}
          policy_loss: -0.0019087294349446893
          total_loss: 9.997897148132324
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,253,2415.28,16187952,-191454,-187785,-202895,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16251936
  counters:
    num_agent_steps_sampled: 16251936
    num_agent_steps_trained: 16251936
    num_env_steps_sampled: 16251936
    num_env_steps_trained: 16251936
  custom_metrics: {}
  date: 2022-10-15_18-26-19
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187377.4362335157
  episode_reward_mean: -190995.93676351573
  episode_reward_min: -194695.5999316879
  episodes_this_iter: 72
  episodes_total: 16248
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1023104190826416
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008675282006151974
          model: {}
          policy_loss: 0.00583320576697588
          total_loss: 10.005695343017578
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,254,2425.16,16251936,-190996,-187377,-194696,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16315920
  counters:
    num_agent_steps_sampled: 16315920
    num_agent_steps_trained: 16315920
    num_env_steps_sampled: 16315920
    num_env_steps_trained: 16315920
  custom_metrics: {}
  date: 2022-10-15_18-26-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187632.988013471
  episode_reward_mean: -191269.42957787093
  episode_reward_min: -195365.2912474732
  episodes_this_iter: 60
  episodes_total: 16308
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0994551181793213
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00036119521246291697
          model: {}
          policy_loss: 0.008246379904448986
          total_loss: 10.008008003234863
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,255,2434.18,16315920,-191269,-187633,-195365,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16379904
  counters:
    num_agent_steps_sampled: 16379904
    num_agent_steps_trained: 16379904
    num_env_steps_sampled: 16379904
    num_env_steps_trained: 16379904
  custom_metrics: {}
  date: 2022-10-15_18-26-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188658.389340284
  episode_reward_mean: -191704.50145343563
  episode_reward_min: -214803.66950031684
  episodes_this_iter: 60
  episodes_total: 16368
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1011369228363037
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005535752279683948
          model: {}
          policy_loss: -0.002943991683423519
          total_loss: 9.996855735778809
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,256,2443,16379904,-191705,-188658,-214804,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16443888
  counters:
    num_agent_steps_sampled: 16443888
    num_agent_steps_trained: 16443888
    num_env_steps_sampled: 16443888
    num_env_steps_trained: 16443888
  custom_metrics: {}
  date: 2022-10-15_18-26-46
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188379.64122692836
  episode_reward_mean: -191570.6289785161
  episode_reward_min: -196663.08085199093
  episodes_this_iter: 72
  episodes_total: 16440
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.097628355026245
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008482947014272213
          model: {}
          policy_loss: 0.005354011431336403
          total_loss: 10.005212783813477
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,257,2451.92,16443888,-191571,-188380,-196663,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16507872
  counters:
    num_agent_steps_sampled: 16507872
    num_agent_steps_trained: 16507872
    num_env_steps_sampled: 16507872
    num_env_steps_trained: 16507872
  custom_metrics: {}
  date: 2022-10-15_18-26-54
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187301.04551211922
  episode_reward_mean: -191088.1992992248
  episode_reward_min: -208373.06431044333
  episodes_this_iter: 60
  episodes_total: 16500
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0961673259735107
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006354321376420557
          model: {}
          policy_loss: 0.007147474214434624
          total_loss: 10.006964683532715
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,258,2460.22,16507872,-191088,-187301,-208373,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16571856
  counters:
    num_agent_steps_sampled: 16571856
    num_agent_steps_trained: 16571856
    num_env_steps_sampled: 16571856
    num_env_steps_trained: 16571856
  custom_metrics: {}
  date: 2022-10-15_18-27-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187301.04551211922
  episode_reward_mean: -190875.5485910245
  episode_reward_min: -208373.06431044333
  episodes_this_iter: 60
  episodes_total: 16560
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0956180095672607
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0012653011362999678
          model: {}
          policy_loss: -0.003222078550606966
          total_loss: 9.996719360351562
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,259,2469.09,16571856,-190876,-187301,-208373,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16635840
  counters:
    num_agent_steps_sampled: 16635840
    num_agent_steps_trained: 16635840
    num_env_steps_sampled: 16635840
    num_env_steps_trained: 16635840
  custom_metrics: {}
  date: 2022-10-15_18-27-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188260.14604115183
  episode_reward_mean: -190969.57313141134
  episode_reward_min: -194987.90972264652
  episodes_this_iter: 72
  episodes_total: 16632
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0987935066223145
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006070888484828174
          model: {}
          policy_loss: 0.005722902715206146
          total_loss: 10.005536079406738
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,260,2477.86,16635840,-190970,-188260,-194988,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16699824
  counters:
    num_agent_steps_sampled: 16699824
    num_agent_steps_trained: 16699824
    num_env_steps_sampled: 16699824
    num_env_steps_trained: 16699824
  custom_metrics: {}
  date: 2022-10-15_18-27-21
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187920.04311409296
  episode_reward_mean: -191089.04262701207
  episode_reward_min: -194987.90972264652
  episodes_this_iter: 60
  episodes_total: 16692
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.098416805267334
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001287752646021545
          model: {}
          policy_loss: 0.007057718466967344
          total_loss: 10.00700569152832
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,261,2486.83,16699824,-191089,-187920,-194988,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16763808
  counters:
    num_agent_steps_sampled: 16763808
    num_agent_steps_trained: 16763808
    num_env_steps_sampled: 16763808
    num_env_steps_trained: 16763808
  custom_metrics: {}
  date: 2022-10-15_18-27-30
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187920.04311409296
  episode_reward_mean: -191283.85295161113
  episode_reward_min: -194961.41267160588
  episodes_this_iter: 60
  episodes_total: 16752
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.098369598388672
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00048196440911851823
          model: {}
          policy_loss: -0.0028006925713270903
          total_loss: 9.996986389160156
          vf_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,262,2495.9,16763808,-191284,-187920,-194961,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16827792
  counters:
    num_agent_steps_sampled: 16827792
    num_agent_steps_trained: 16827792
    num_env_steps_sampled: 16827792
    num_env_steps_trained: 16827792
  custom_metrics: {}
  date: 2022-10-15_18-27-39
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188307.6138222996
  episode_reward_mean: -191039.45011342503
  episode_reward_min: -194873.8344055902
  episodes_this_iter: 72
  episodes_total: 16824
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.103433609008789
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008090636692941189
          model: {}
          policy_loss: 0.005000079981982708
          total_loss: 10.004853248596191
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,263,2504.71,16827792,-191039,-188308,-194874,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16891776
  counters:
    num_agent_steps_sampled: 16891776
    num_agent_steps_trained: 16891776
    num_env_steps_sampled: 16891776
    num_env_steps_trained: 16891776
  custom_metrics: {}
  date: 2022-10-15_18-27-48
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187624.21260844558
  episode_reward_mean: -190945.99149769588
  episode_reward_min: -196282.30776097908
  episodes_this_iter: 60
  episodes_total: 16884
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1100664138793945
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004346906498540193
          model: {}
          policy_loss: 0.007517187390476465
          total_loss: 10.007294654846191
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,264,2513.61,16891776,-190946,-187624,-196282,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 16955760
  counters:
    num_agent_steps_sampled: 16955760
    num_agent_steps_trained: 16955760
    num_env_steps_sampled: 16955760
    num_env_steps_trained: 16955760
  custom_metrics: {}
  date: 2022-10-15_18-27-57
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -186993.18522944787
  episode_reward_mean: -191432.88737909627
  episode_reward_min: -200979.12210712573
  episodes_this_iter: 60
  episodes_total: 16944
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1105480194091797
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001076370826922357
          model: {}
          policy_loss: -0.0028470042161643505
          total_loss: 9.997057914733887
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,265,2522.45,16955760,-191433,-186993,-200979,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17019744
  counters:
    num_agent_steps_sampled: 17019744
    num_agent_steps_trained: 17019744
    num_env_steps_sampled: 17019744
    num_env_steps_trained: 17019744
  custom_metrics: {}
  date: 2022-10-15_18-28-05
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187183.05105843776
  episode_reward_mean: -191101.25061797816
  episode_reward_min: -204842.10914266703
  episodes_this_iter: 72
  episodes_total: 17016
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1081244945526123
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0004215535009279847
          model: {}
          policy_loss: 0.005701756104826927
          total_loss: 10.005475044250488
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,266,2530.79,17019744,-191101,-187183,-204842,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17083728
  counters:
    num_agent_steps_sampled: 17083728
    num_agent_steps_trained: 17083728
    num_env_steps_sampled: 17083728
    num_env_steps_trained: 17083728
  custom_metrics: {}
  date: 2022-10-15_18-28-14
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187555.56245429098
  episode_reward_mean: -190924.98016516413
  episode_reward_min: -204842.10914266703
  episodes_this_iter: 60
  episodes_total: 17076
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1079256534576416
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007845538784749806
          model: {}
          policy_loss: 0.007121074013411999
          total_loss: 10.006967544555664
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,267,2539.67,17083728,-190925,-187556,-204842,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17147712
  counters:
    num_agent_steps_sampled: 17147712
    num_agent_steps_trained: 17147712
    num_env_steps_sampled: 17147712
    num_env_steps_trained: 17147712
  custom_metrics: {}
  date: 2022-10-15_18-28-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187604.66541026128
  episode_reward_mean: -190518.41563898805
  episode_reward_min: -195137.6115650324
  episodes_this_iter: 60
  episodes_total: 17136
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.108135223388672
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008501014672219753
          model: {}
          policy_loss: -0.0023941497784107924
          total_loss: 9.997465133666992
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,268,2548.67,17147712,-190518,-187605,-195138,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17211696
  counters:
    num_agent_steps_sampled: 17211696
    num_agent_steps_trained: 17211696
    num_env_steps_sampled: 17211696
    num_env_steps_trained: 17211696
  custom_metrics: {}
  date: 2022-10-15_18-28-32
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -186608.0089799702
  episode_reward_mean: -190655.47337622524
  episode_reward_min: -208064.88635224933
  episodes_this_iter: 72
  episodes_total: 17208
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1050615310668945
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0016200905665755272
          model: {}
          policy_loss: 0.0052316621877253056
          total_loss: 10.005244255065918
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,269,2557.59,17211696,-190655,-186608,-208065,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17275680
  counters:
    num_agent_steps_sampled: 17275680
    num_agent_steps_trained: 17275680
    num_env_steps_sampled: 17275680
    num_env_steps_trained: 17275680
  custom_metrics: {}
  date: 2022-10-15_18-28-41
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -186608.0089799702
  episode_reward_mean: -190508.9317946244
  episode_reward_min: -197226.60882604675
  episodes_this_iter: 60
  episodes_total: 17268
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1085751056671143
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005310744745656848
          model: {}
          policy_loss: 0.007570182904601097
          total_loss: 10.007365226745605
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,270,2566.3,17275680,-190509,-186608,-197227,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17339664
  counters:
    num_agent_steps_sampled: 17339664
    num_agent_steps_trained: 17339664
    num_env_steps_sampled: 17339664
    num_env_steps_trained: 17339664
  custom_metrics: {}
  date: 2022-10-15_18-28-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187792.62821741228
  episode_reward_mean: -190746.83532953187
  episode_reward_min: -195409.21500153767
  episodes_this_iter: 60
  episodes_total: 17328
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.105029344558716
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0009713497129268944
          model: {}
          policy_loss: -0.002143857069313526
          total_loss: 9.997739791870117
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,271,2575.6,17339664,-190747,-187793,-195409,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17403648
  counters:
    num_agent_steps_sampled: 17403648
    num_agent_steps_trained: 17403648
    num_env_steps_sampled: 17403648
    num_env_steps_trained: 17403648
  custom_metrics: {}
  date: 2022-10-15_18-29-00
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188479.53112677098
  episode_reward_mean: -190942.99225562255
  episode_reward_min: -194712.9805543339
  episodes_this_iter: 72
  episodes_total: 17400
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.103708028793335
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005875796196050942
          model: {}
          policy_loss: 0.005207995884120464
          total_loss: 10.005013465881348
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,272,2585.12,17403648,-190943,-188480,-194713,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17467632
  counters:
    num_agent_steps_sampled: 17467632
    num_agent_steps_trained: 17467632
    num_env_steps_sampled: 17467632
    num_env_steps_trained: 17467632
  custom_metrics: {}
  date: 2022-10-15_18-29-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188783.08575760448
  episode_reward_mean: -191017.53671293083
  episode_reward_min: -194712.9805543339
  episodes_this_iter: 60
  episodes_total: 17460
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.100745916366577
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008187902858480811
          model: {}
          policy_loss: 0.007546423934400082
          total_loss: 10.007401466369629
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,273,2594.35,17467632,-191018,-188783,-194713,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17531616
  counters:
    num_agent_steps_sampled: 17531616
    num_agent_steps_trained: 17531616
    num_env_steps_sampled: 17531616
    num_env_steps_trained: 17531616
  custom_metrics: {}
  date: 2022-10-15_18-29-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188086.15994701872
  episode_reward_mean: -190953.3909496395
  episode_reward_min: -193610.4192624042
  episodes_this_iter: 60
  episodes_total: 17520
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.102976083755493
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007961284136399627
          model: {}
          policy_loss: -0.0018833065405488014
          total_loss: 9.997965812683105
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,274,2603.3,17531616,-190953,-188086,-193610,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17595600
  counters:
    num_agent_steps_sampled: 17595600
    num_agent_steps_trained: 17595600
    num_env_steps_sampled: 17595600
    num_env_steps_trained: 17595600
  custom_metrics: {}
  date: 2022-10-15_18-29-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187812.64897462627
  episode_reward_mean: -190880.98985500316
  episode_reward_min: -196960.7974974207
  episodes_this_iter: 72
  episodes_total: 17592
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1051998138427734
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0010022877249866724
          model: {}
          policy_loss: 0.005028248764574528
          total_loss: 10.004919052124023
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,275,2612.33,17595600,-190881,-187813,-196961,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17659584
  counters:
    num_agent_steps_sampled: 17659584
    num_agent_steps_trained: 17659584
    num_env_steps_sampled: 17659584
    num_env_steps_trained: 17659584
  custom_metrics: {}
  date: 2022-10-15_18-29-36
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187812.64897462627
  episode_reward_mean: -190537.29105994254
  episode_reward_min: -193409.4597643117
  episodes_this_iter: 60
  episodes_total: 17652
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.107006549835205
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0007081144722178578
          model: {}
          policy_loss: 0.006911231204867363
          total_loss: 10.006742477416992
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,276,2621.49,17659584,-190537,-187813,-193409,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17723568
  counters:
    num_agent_steps_sampled: 17723568
    num_agent_steps_trained: 17723568
    num_env_steps_sampled: 17723568
    num_env_steps_trained: 17723568
  custom_metrics: {}
  date: 2022-10-15_18-29-45
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187550.70043550374
  episode_reward_mean: -190221.14319918252
  episode_reward_min: -193520.36699446032
  episodes_this_iter: 60
  episodes_total: 17712
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.109989643096924
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006558825843967497
          model: {}
          policy_loss: -0.0016094360034912825
          total_loss: 9.998211860656738
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,277,2630.74,17723568,-190221,-187551,-193520,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17787552
  counters:
    num_agent_steps_sampled: 17787552
    num_agent_steps_trained: 17787552
    num_env_steps_sampled: 17787552
    num_env_steps_trained: 17787552
  custom_metrics: {}
  date: 2022-10-15_18-29-55
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187550.70043550374
  episode_reward_mean: -190258.85466934115
  episode_reward_min: -193854.63117036398
  episodes_this_iter: 72
  episodes_total: 17784
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.10483717918396
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001078386907465756
          model: {}
          policy_loss: 0.00463333772495389
          total_loss: 10.004537582397461
          vf_expla

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,278,2640.5,17787552,-190259,-187551,-193855,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17851536
  counters:
    num_agent_steps_sampled: 17851536
    num_agent_steps_trained: 17851536
    num_env_steps_sampled: 17851536
    num_env_steps_trained: 17851536
  custom_metrics: {}
  date: 2022-10-15_18-30-05
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187946.0234414324
  episode_reward_mean: -190420.4017303911
  episode_reward_min: -198084.70113748458
  episodes_this_iter: 60
  episodes_total: 17844
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.103043794631958
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0009228751878254116
          model: {}
          policy_loss: 0.006628234405070543
          total_loss: 10.006502151489258
          vf_expl

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,279,2649.96,17851536,-190420,-187946,-198085,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17915520
  counters:
    num_agent_steps_sampled: 17915520
    num_agent_steps_trained: 17915520
    num_env_steps_sampled: 17915520
    num_env_steps_trained: 17915520
  custom_metrics: {}
  date: 2022-10-15_18-30-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -186216.06625641987
  episode_reward_mean: -190458.6383742319
  episode_reward_min: -198084.70113748458
  episodes_this_iter: 60
  episodes_total: 17904
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1049790382385254
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0010849705431610346
          model: {}
          policy_loss: -0.0014879693044349551
          total_loss: 9.998418807983398
          vf_e

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,280,2659.85,17915520,-190459,-186216,-198085,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 17979504
  counters:
    num_agent_steps_sampled: 17979504
    num_agent_steps_trained: 17979504
    num_env_steps_sampled: 17979504
    num_env_steps_trained: 17979504
  custom_metrics: {}
  date: 2022-10-15_18-30-24
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -186216.06625641987
  episode_reward_mean: -190443.3978171198
  episode_reward_min: -195000.95366231626
  episodes_this_iter: 72
  episodes_total: 17976
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.109107255935669
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0008163059246726334
          model: {}
          policy_loss: 0.004658929537981749
          total_loss: 10.004510879516602
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,281,2669.5,17979504,-190443,-186216,-195001,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 18043488
  counters:
    num_agent_steps_sampled: 18043488
    num_agent_steps_trained: 18043488
    num_env_steps_sampled: 18043488
    num_env_steps_trained: 18043488
  custom_metrics: {}
  date: 2022-10-15_18-30-34
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187791.5856851314
  episode_reward_mean: -191044.81886799642
  episode_reward_min: -195000.95366231626
  episodes_this_iter: 60
  episodes_total: 18036
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.109548568725586
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0010908659314736724
          model: {}
          policy_loss: 0.006644752807915211
          total_loss: 10.006551742553711
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,282,2679.45,18043488,-191045,-187792,-195001,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,282,2679.45,18043488,-191045,-187792,-195001,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 18107472
  counters:
    num_agent_steps_sampled: 18107472
    num_agent_steps_trained: 18107472
    num_env_steps_sampled: 18107472
    num_env_steps_trained: 18107472
  custom_metrics: {}
  date: 2022-10-15_18-30-44
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187238.3418040868
  episode_reward_mean: -191204.14101381126
  episode_reward_min: -195155.0845863614
  episodes_this_iter: 60
  episodes_total: 18096
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.107776403427124
          entropy_coeff: 9.999999747378752e-05
          kl: 0.000678788055665791
          model: {}
          policy_loss: -0.0009947724174708128
          total_loss: 9.99882984161377
          vf_expla

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,283,2689.41,18107472,-191204,-187238,-195155,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 18171456
  counters:
    num_agent_steps_sampled: 18171456
    num_agent_steps_trained: 18171456
    num_env_steps_sampled: 18171456
    num_env_steps_trained: 18171456
  custom_metrics: {}
  date: 2022-10-15_18-30-54
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187238.3418040868
  episode_reward_mean: -191016.98842234493
  episode_reward_min: -202238.2891679721
  episodes_this_iter: 72
  episodes_total: 18168
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1050024032592773
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00034358820994384587
          model: {}
          policy_loss: 0.005003764294087887
          total_loss: 10.004762649536133
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,284,2699.02,18171456,-191017,-187238,-202238,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 18235440
  counters:
    num_agent_steps_sampled: 18235440
    num_agent_steps_trained: 18235440
    num_env_steps_sampled: 18235440
    num_env_steps_trained: 18235440
  custom_metrics: {}
  date: 2022-10-15_18-31-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187256.28434776116
  episode_reward_mean: -190892.93632320157
  episode_reward_min: -198537.5711022306
  episodes_this_iter: 60
  episodes_total: 18228
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.102843999862671
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006825228338129818
          model: {}
          policy_loss: 0.007173463236540556
          total_loss: 10.007000923156738
          vf_exp

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,285,2708.46,18235440,-190893,-187256,-198538,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 18299424
  counters:
    num_agent_steps_sampled: 18299424
    num_agent_steps_trained: 18299424
    num_env_steps_sampled: 18299424
    num_env_steps_trained: 18299424
  custom_metrics: {}
  date: 2022-10-15_18-31-13
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187256.28434776116
  episode_reward_mean: -190740.00314073593
  episode_reward_min: -201771.0343365065
  episodes_this_iter: 60
  episodes_total: 18288
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0986595153808594
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0005480574327521026
          model: {}
          policy_loss: -0.0015350134344771504
          total_loss: 9.99826431274414
          vf_ex

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,286,2718.44,18299424,-190740,-187256,-201771,1000


Result for PPOTrainer_IBGym-v1_62e25_00000:
  agent_timesteps_total: 18363408
  counters:
    num_agent_steps_sampled: 18363408
    num_agent_steps_trained: 18363408
    num_env_steps_sampled: 18363408
    num_env_steps_trained: 18363408
  custom_metrics: {}
  date: 2022-10-15_18-31-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -186405.0224335903
  episode_reward_mean: -190814.40596034896
  episode_reward_min: -205528.119498394
  episodes_this_iter: 72
  episodes_total: 18360
  experiment_id: 416915bca0d6482fa48dbfee58d8681b
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0971672534942627
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0006344734574668109
          model: {}
          policy_loss: 0.005250617396086454
          total_loss: 10.00506591796875
          vf_expla

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_62e25_00000,RUNNING,192.168.0.185:18350,287,2728.1,18363408,-190814,-186405,-205528,1000
