In [None]:
import ray.tune as tune
from ray.rllib.agents.ppo import PPOTrainer
from ray.tune import register_env
from envs.env_creator import env_creator, ibgym_env_creator_rllib
from envs.IBGym_mod_envs import IBGymModded
from ppo.policy import LSTMPPOPolicy


## Configure Training

In [None]:


register_env("IBGym-v1", ibgym_env_creator_rllib)

config = {
    # Environment (RLlib understands openAI gym registered strings).
    "env": "IBGym-v1",
    # Use 2 environment workers (aka "rollout workers") that parallelly
    # collect samples from their own environment clone(s).
    "num_workers": 12,
    "num_gpus": 1,

    # Change this to "framework: torch", if you are using PyTorch.
    # Also, use "framework: tf2" for tf2.x eager execution.
    "framework": "tf",
    "entropy_coeff": 0.0001,
    # "entropy_coeff_schedule":PiecewiseSchedule(endpoints=[(0, 0.01), (143000, 0.00001)]),
    "lr": 3e-5,
    "gamma": 0.994,
    "clip_param": 0.2,
    "seed": 5321,
    "num_sgd_iter": 2,
    "sgd_minibatch_size": 1000,

    # "vf_loss_coeff": 1e-9,
    # "vf_clip_param": 1e7,
    # Tweak the default model provided automatically by RLlib,
    # given the environment's observation- and action spaces.
    "model": {
        # == LSTM ==
        # Whether to wrap the model with an LSTM.
        "use_lstm": True,
        # Max seq len for training the LSTM, defaults to 20.
        "max_seq_len": 30,
        # Size of the LSTM cell.
        "lstm_cell_size": 64,
        # "use_attention": True,
        # "attention_num_transformer_units": 2,
        # "attention_dim": 128,
        # "vf_share_layers": True,
        # "fcnet_hiddens": [32, 32, 32],
        # "sgd_minibatch_size": 1024,
        "vf_share_layers": False,
        # Whether to feed a_{t-1} to LSTM (one-hot encoded if discrete).
        "lstm_use_prev_action": False,
        # Whether to feed r_{t-1} to LSTM.
        "lstm_use_prev_reward": False,
        # Whether the LSTM is time-major (TxBx..) or batch-major (BxTx..).
        "_time_major": False,
    },
    "train_batch_size": 32000,
    "timesteps_per_iteration": 32000,
    # "output": "tmp/ib-out",
    # Set up a separate evaluation worker set for the
    # `trainer.evaluate()` call after training (see below).
    "evaluation_num_workers": 3,
    # Only for evaluation runs, render the env.
    "evaluation_config": {
        "render_env": False,
    },
}

In [3]:
results = tune.run(
        PPOTrainer,
        config=config,
        name="industrial_benchmark",
        local_dir="tmp/ray_exp_logs",
        checkpoint_freq=5,
        # stop={"training_iteration": 5},
        sync_config=tune.SyncConfig(
            syncer=None  # Disable syncing
        )
    )

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,13,206.826,831792,-230604,-212036,-263445,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 895776
  counters:
    num_agent_steps_sampled: 895776
    num_agent_steps_trained: 895776
    num_env_steps_sampled: 895776
    num_env_steps_trained: 895776
  custom_metrics: {}
  date: 2022-10-14_12-13-56
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -212035.63694766775
  episode_reward_mean: -230745.94534207933
  episode_reward_min: -303472.80645493016
  episodes_this_iter: 60
  episodes_total: 888
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1898248195648193
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0020165974274277687
          model: {}
          policy_loss: -0.0016078799962997437
          total_loss: 9.998476028442383
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,14,222.333,895776,-230746,-212036,-303473,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,14,222.333,895776,-230746,-212036,-303473,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,14,222.333,895776,-230746,-212036,-303473,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 959760
  counters:
    num_agent_steps_sampled: 959760
    num_agent_steps_trained: 959760
    num_env_steps_sampled: 959760
    num_env_steps_trained: 959760
  custom_metrics: {}
  date: 2022-10-14_12-14-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -213896.81338503739
  episode_reward_mean: -230412.48026239523
  episode_reward_min: -303472.80645493016
  episodes_this_iter: 60
  episodes_total: 948
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1853694915771484
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0021077217534184456
          model: {}
          policy_loss: 0.00022972095757722855
          total_loss: 10.00033187866211
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,15,238.201,959760,-230412,-213897,-303473,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,15,238.201,959760,-230412,-213897,-303473,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,15,238.201,959760,-230412,-213897,-303473,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1023744
  counters:
    num_agent_steps_sampled: 1023744
    num_agent_steps_trained: 1023744
    num_env_steps_sampled: 1023744
    num_env_steps_trained: 1023744
  custom_metrics: {}
  date: 2022-10-14_12-14-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -211455.83614863481
  episode_reward_mean: -228762.98114044423
  episode_reward_min: -295979.8174079158
  episodes_this_iter: 72
  episodes_total: 1020
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.189800977706909
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00188849784899503
          model: {}
          policy_loss: 0.005997458007186651
          total_loss: 10.00605583190918
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,16,253.552,1023740.0,-228763,-211456,-295980,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,16,253.552,1023740.0,-228763,-211456,-295980,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,16,253.552,1023740.0,-228763,-211456,-295980,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1087728
  counters:
    num_agent_steps_sampled: 1087728
    num_agent_steps_trained: 1087728
    num_env_steps_sampled: 1087728
    num_env_steps_trained: 1087728
  custom_metrics: {}
  date: 2022-10-14_12-14-44
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -211455.83614863481
  episode_reward_mean: -227065.8753183654
  episode_reward_min: -290837.330195613
  episodes_this_iter: 60
  episodes_total: 1080
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.186768054962158
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0020511276088654995
          model: {}
          policy_loss: 0.005901666358113289
          total_loss: 10.005992889404297
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,17,270.06,1087730.0,-227066,-211456,-290837,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,17,270.06,1087730.0,-227066,-211456,-290837,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,17,270.06,1087730.0,-227066,-211456,-290837,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1151712
  counters:
    num_agent_steps_sampled: 1151712
    num_agent_steps_trained: 1151712
    num_env_steps_sampled: 1151712
    num_env_steps_trained: 1151712
  custom_metrics: {}
  date: 2022-10-14_12-15-00
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -211427.20345332028
  episode_reward_mean: -225834.4701837723
  episode_reward_min: -265924.86951028917
  episodes_this_iter: 60
  episodes_total: 1140
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.190392255783081
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001882149837911129
          model: {}
          policy_loss: 0.0011186019983142614
          total_loss: 10.001174926757812
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,18,286.428,1151710.0,-225834,-211427,-265925,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,18,286.428,1151710.0,-225834,-211427,-265925,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,18,286.428,1151710.0,-225834,-211427,-265925,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1215696
  counters:
    num_agent_steps_sampled: 1215696
    num_agent_steps_trained: 1215696
    num_env_steps_sampled: 1215696
    num_env_steps_trained: 1215696
  custom_metrics: {}
  date: 2022-10-14_12-15-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -210637.02773507687
  episode_reward_mean: -223148.32382704868
  episode_reward_min: -262837.37349418446
  episodes_this_iter: 72
  episodes_total: 1212
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.197052478790283
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0012963061453774571
          model: {}
          policy_loss: 0.006734847091138363
          total_loss: 10.006674766540527
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,19,301.524,1215700.0,-223148,-210637,-262837,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,19,301.524,1215700.0,-223148,-210637,-262837,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,19,301.524,1215700.0,-223148,-210637,-262837,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1279680
  counters:
    num_agent_steps_sampled: 1279680
    num_agent_steps_trained: 1279680
    num_env_steps_sampled: 1279680
    num_env_steps_trained: 1279680
  custom_metrics: {}
  date: 2022-10-14_12-15-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -206864.16603588185
  episode_reward_mean: -222091.1951210536
  episode_reward_min: -253130.6172528566
  episodes_this_iter: 60
  episodes_total: 1272
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.2016568183898926
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019647504668682814
          model: {}
          policy_loss: 0.003713137935847044
          total_loss: 10.003786087036133
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,20,316.942,1279680.0,-222091,-206864,-253131,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,20,316.942,1279680.0,-222091,-206864,-253131,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,20,316.942,1279680.0,-222091,-206864,-253131,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1343664
  counters:
    num_agent_steps_sampled: 1343664
    num_agent_steps_trained: 1343664
    num_env_steps_sampled: 1343664
    num_env_steps_trained: 1343664
  custom_metrics: {}
  date: 2022-10-14_12-15-46
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -206904.46788823238
  episode_reward_mean: -220359.1268907074
  episode_reward_min: -253130.6172528566
  episodes_this_iter: 60
  episodes_total: 1332
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.191218852996826
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001985028851777315
          model: {}
          policy_loss: 0.00015871570212766528
          total_loss: 10.000236511230469
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,21,332.217,1343660.0,-220359,-206904,-253131,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,21,332.217,1343660.0,-220359,-206904,-253131,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,21,332.217,1343660.0,-220359,-206904,-253131,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1407648
  counters:
    num_agent_steps_sampled: 1407648
    num_agent_steps_trained: 1407648
    num_env_steps_sampled: 1407648
    num_env_steps_trained: 1407648
  custom_metrics: {}
  date: 2022-10-14_12-16-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -208280.77079080415
  episode_reward_mean: -217911.39226143697
  episode_reward_min: -233746.30378946685
  episodes_this_iter: 72
  episodes_total: 1404
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1845169067382812
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017836472252383828
          model: {}
          policy_loss: 0.003890256630256772
          total_loss: 10.003929138183594
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,22,348.034,1407650.0,-217911,-208281,-233746,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,22,348.034,1407650.0,-217911,-208281,-233746,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,22,348.034,1407650.0,-217911,-208281,-233746,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1471632
  counters:
    num_agent_steps_sampled: 1471632
    num_agent_steps_trained: 1471632
    num_env_steps_sampled: 1471632
    num_env_steps_trained: 1471632
  custom_metrics: {}
  date: 2022-10-14_12-16-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203584.0717492047
  episode_reward_mean: -218907.16813429154
  episode_reward_min: -268617.8982193841
  episodes_this_iter: 60
  episodes_total: 1464
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.181027889251709
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001777302473783493
          model: {}
          policy_loss: 0.0027620696928352118
          total_loss: 10.002799034118652
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,23,363.718,1471630.0,-218907,-203584,-268618,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,23,363.718,1471630.0,-218907,-203584,-268618,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1535616
  counters:
    num_agent_steps_sampled: 1535616
    num_agent_steps_trained: 1535616
    num_env_steps_sampled: 1535616
    num_env_steps_trained: 1535616
  custom_metrics: {}
  date: 2022-10-14_12-16-33
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203483.51135324533
  episode_reward_mean: -217412.39564396403
  episode_reward_min: -266031.71247158945
  episodes_this_iter: 60
  episodes_total: 1524
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1794049739837646
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017912257462739944
          model: {}
          policy_loss: -0.0065317098051309586
          total_loss: 9.993507385253906
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,24,378.583,1535620.0,-217412,-203484,-266032,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,24,378.583,1535620.0,-217412,-203484,-266032,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,24,378.583,1535620.0,-217412,-203484,-266032,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1599600
  counters:
    num_agent_steps_sampled: 1599600
    num_agent_steps_trained: 1599600
    num_env_steps_sampled: 1599600
    num_env_steps_trained: 1599600
  custom_metrics: {}
  date: 2022-10-14_12-16-48
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203483.51135324533
  episode_reward_mean: -215152.4350974754
  episode_reward_min: -226700.10240544818
  episodes_this_iter: 72
  episodes_total: 1596
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1799964904785156
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002160376636311412
          model: {}
          policy_loss: 0.0023466835264116526
          total_loss: 10.002461433410645
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,25,393.907,1599600.0,-215152,-203484,-226700,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,25,393.907,1599600.0,-215152,-203484,-226700,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,25,393.907,1599600.0,-215152,-203484,-226700,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1663584
  counters:
    num_agent_steps_sampled: 1663584
    num_agent_steps_trained: 1663584
    num_env_steps_sampled: 1663584
    num_env_steps_trained: 1663584
  custom_metrics: {}
  date: 2022-10-14_12-17-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -204604.37498847177
  episode_reward_mean: -213916.82170201716
  episode_reward_min: -226295.83599401484
  episodes_this_iter: 60
  episodes_total: 1656
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.175508499145508
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0018832115456461906
          model: {}
          policy_loss: 0.005672579165548086
          total_loss: 10.005730628967285
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,26,408.978,1663580.0,-213917,-204604,-226296,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,26,408.978,1663580.0,-213917,-204604,-226296,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,26,408.978,1663580.0,-213917,-204604,-226296,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1727568
  counters:
    num_agent_steps_sampled: 1727568
    num_agent_steps_trained: 1727568
    num_env_steps_sampled: 1727568
    num_env_steps_trained: 1727568
  custom_metrics: {}
  date: 2022-10-14_12-17-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203508.7106639694
  episode_reward_mean: -216235.70762756775
  episode_reward_min: -288091.98887724103
  episodes_this_iter: 60
  episodes_total: 1716
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.166250228881836
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0023053523618727922
          model: {}
          policy_loss: 0.0020070646423846483
          total_loss: 10.002152442932129
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,27,424.31,1727570.0,-216236,-203509,-288092,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,27,424.31,1727570.0,-216236,-203509,-288092,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,27,424.31,1727570.0,-216236,-203509,-288092,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1791552
  counters:
    num_agent_steps_sampled: 1791552
    num_agent_steps_trained: 1791552
    num_env_steps_sampled: 1791552
    num_env_steps_trained: 1791552
  custom_metrics: {}
  date: 2022-10-14_12-17-34
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203508.7106639694
  episode_reward_mean: -216603.72533681185
  episode_reward_min: -288091.98887724103
  episodes_this_iter: 72
  episodes_total: 1788
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.160926342010498
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0024710926227271557
          model: {}
          policy_loss: 0.00858118012547493
          total_loss: 10.008758544921875
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,28,439.765,1791550.0,-216604,-203509,-288092,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,28,439.765,1791550.0,-216604,-203509,-288092,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,28,439.765,1791550.0,-216604,-203509,-288092,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1855536
  counters:
    num_agent_steps_sampled: 1855536
    num_agent_steps_trained: 1855536
    num_env_steps_sampled: 1855536
    num_env_steps_trained: 1855536
  custom_metrics: {}
  date: 2022-10-14_12-17-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -204964.31745586454
  episode_reward_mean: -216106.61426056348
  episode_reward_min: -273775.92469949025
  episodes_this_iter: 60
  episodes_total: 1848
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1628544330596924
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0020686069037765265
          model: {}
          policy_loss: 0.004060119390487671
          total_loss: 10.004158020019531
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,29,456.012,1855540.0,-216107,-204964,-273776,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,29,456.012,1855540.0,-216107,-204964,-273776,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,29,456.012,1855540.0,-216107,-204964,-273776,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1919520
  counters:
    num_agent_steps_sampled: 1919520
    num_agent_steps_trained: 1919520
    num_env_steps_sampled: 1919520
    num_env_steps_trained: 1919520
  custom_metrics: {}
  date: 2022-10-14_12-18-05
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -204367.46261826897
  episode_reward_mean: -214423.18969880533
  episode_reward_min: -265787.0462484535
  episodes_this_iter: 60
  episodes_total: 1908
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1571688652038574
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0020385703537613153
          model: {}
          policy_loss: 0.001669418066740036
          total_loss: 10.001760482788086
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,30,471.251,1919520.0,-214423,-204367,-265787,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,30,471.251,1919520.0,-214423,-204367,-265787,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 1983504
  counters:
    num_agent_steps_sampled: 1983504
    num_agent_steps_trained: 1983504
    num_env_steps_sampled: 1983504
    num_env_steps_trained: 1983504
  custom_metrics: {}
  date: 2022-10-14_12-18-21
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205447.43054673067
  episode_reward_mean: -213882.716355698
  episode_reward_min: -260976.56690828517
  episodes_this_iter: 72
  episodes_total: 1980
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.157827377319336
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0022739574778825045
          model: {}
          policy_loss: 0.006677048280835152
          total_loss: 10.006815910339355
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,31,486.564,1983500.0,-213883,-205447,-260977,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,31,486.564,1983500.0,-213883,-205447,-260977,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,31,486.564,1983500.0,-213883,-205447,-260977,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2047488
  counters:
    num_agent_steps_sampled: 2047488
    num_agent_steps_trained: 2047488
    num_env_steps_sampled: 2047488
    num_env_steps_trained: 2047488
  custom_metrics: {}
  date: 2022-10-14_12-18-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205447.43054673067
  episode_reward_mean: -214549.1910200332
  episode_reward_min: -260976.56690828517
  episodes_this_iter: 60
  episodes_total: 2040
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1592791080474854
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017458065412938595
          model: {}
          policy_loss: 0.006505824625492096
          total_loss: 10.006538391113281
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,32,503.098,2047490.0,-214549,-205447,-260977,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,32,503.098,2047490.0,-214549,-205447,-260977,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,32,503.098,2047490.0,-214549,-205447,-260977,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2111472
  counters:
    num_agent_steps_sampled: 2111472
    num_agent_steps_trained: 2111472
    num_env_steps_sampled: 2111472
    num_env_steps_trained: 2111472
  custom_metrics: {}
  date: 2022-10-14_12-18-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205979.76762272144
  episode_reward_mean: -215278.62090900243
  episode_reward_min: -237251.03903564156
  episodes_this_iter: 60
  episodes_total: 2100
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1565890312194824
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0018260887591168284
          model: {}
          policy_loss: 8.033215999603271e-05
          total_loss: 10.000129699707031
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,33,518.377,2111470.0,-215279,-205980,-237251,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,33,518.377,2111470.0,-215279,-205980,-237251,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,33,518.377,2111470.0,-215279,-205980,-237251,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2175456
  counters:
    num_agent_steps_sampled: 2175456
    num_agent_steps_trained: 2175456
    num_env_steps_sampled: 2175456
    num_env_steps_trained: 2175456
  custom_metrics: {}
  date: 2022-10-14_12-19-08
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205979.540833765
  episode_reward_mean: -215019.55175908087
  episode_reward_min: -232571.28804571196
  episodes_this_iter: 72
  episodes_total: 2172
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.156897783279419
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017313743010163307
          model: {}
          policy_loss: 0.00723530538380146
          total_loss: 10.0072660446167
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,34,534.063,2175460.0,-215020,-205980,-232571,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,34,534.063,2175460.0,-215020,-205980,-232571,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2239440
  counters:
    num_agent_steps_sampled: 2239440
    num_agent_steps_trained: 2239440
    num_env_steps_sampled: 2239440
    num_env_steps_trained: 2239440
  custom_metrics: {}
  date: 2022-10-14_12-19-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205630.09020700658
  episode_reward_mean: -214334.83340572123
  episode_reward_min: -226271.59060176468
  episodes_this_iter: 60
  episodes_total: 2232
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.156545877456665
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0018354773055762053
          model: {}
          policy_loss: 0.00602150522172451
          total_loss: 10.006072998046875
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,35,548.896,2239440.0,-214335,-205630,-226272,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,35,548.896,2239440.0,-214335,-205630,-226272,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2303424
  counters:
    num_agent_steps_sampled: 2303424
    num_agent_steps_trained: 2303424
    num_env_steps_sampled: 2303424
    num_env_steps_trained: 2303424
  custom_metrics: {}
  date: 2022-10-14_12-19-38
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -205477.26945502078
  episode_reward_mean: -213228.92035652723
  episode_reward_min: -224884.74597870518
  episodes_this_iter: 60
  episodes_total: 2292
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1513824462890625
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001969194272533059
          model: {}
          policy_loss: -0.0005196034908294678
          total_loss: 9.999558448791504
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,36,564.053,2303420.0,-213229,-205477,-224885,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,36,564.053,2303420.0,-213229,-205477,-224885,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2367408
  counters:
    num_agent_steps_sampled: 2367408
    num_agent_steps_trained: 2367408
    num_env_steps_sampled: 2367408
    num_env_steps_trained: 2367408
  custom_metrics: {}
  date: 2022-10-14_12-19-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -204282.02535206903
  episode_reward_mean: -213243.30789859593
  episode_reward_min: -224305.92978500525
  episodes_this_iter: 72
  episodes_total: 2364
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1467208862304688
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019365899497643113
          model: {}
          policy_loss: 0.007180704269558191
          total_loss: 10.00725269317627
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,37,578.851,2367410.0,-213243,-204282,-224306,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,37,578.851,2367410.0,-213243,-204282,-224306,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2431392
  counters:
    num_agent_steps_sampled: 2431392
    num_agent_steps_trained: 2431392
    num_env_steps_sampled: 2431392
    num_env_steps_trained: 2431392
  custom_metrics: {}
  date: 2022-10-14_12-20-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203027.07746725224
  episode_reward_mean: -213516.52507259604
  episode_reward_min: -242226.89677700918
  episodes_this_iter: 60
  episodes_total: 2424
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.140758991241455
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019217245280742645
          model: {}
          policy_loss: 0.0057186828926205635
          total_loss: 10.005788803100586
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,38,594.393,2431390.0,-213517,-203027,-242227,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,38,594.393,2431390.0,-213517,-203027,-242227,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,38,594.393,2431390.0,-213517,-203027,-242227,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2495376
  counters:
    num_agent_steps_sampled: 2495376
    num_agent_steps_trained: 2495376
    num_env_steps_sampled: 2495376
    num_env_steps_trained: 2495376
  custom_metrics: {}
  date: 2022-10-14_12-20-24
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203027.07746725224
  episode_reward_mean: -212835.43683733037
  episode_reward_min: -246035.45101386987
  episodes_this_iter: 60
  episodes_total: 2484
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1340737342834473
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017944342689588666
          model: {}
          policy_loss: 0.0023200977593660355
          total_loss: 10.00236701965332
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,39,609.437,2495380.0,-212835,-203027,-246035,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,39,609.437,2495380.0,-212835,-203027,-246035,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,39,609.437,2495380.0,-212835,-203027,-246035,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2559360
  counters:
    num_agent_steps_sampled: 2559360
    num_agent_steps_trained: 2559360
    num_env_steps_sampled: 2559360
    num_env_steps_trained: 2559360
  custom_metrics: {}
  date: 2022-10-14_12-20-40
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203507.15361726045
  episode_reward_mean: -211730.04631479405
  episode_reward_min: -255113.14353589775
  episodes_this_iter: 72
  episodes_total: 2556
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.133845329284668
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0018180700717493892
          model: {}
          policy_loss: 0.007808729540556669
          total_loss: 10.007858276367188
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,40,625.405,2559360.0,-211730,-203507,-255113,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,40,625.405,2559360.0,-211730,-203507,-255113,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,40,625.405,2559360.0,-211730,-203507,-255113,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2623344
  counters:
    num_agent_steps_sampled: 2623344
    num_agent_steps_trained: 2623344
    num_env_steps_sampled: 2623344
    num_env_steps_trained: 2623344
  custom_metrics: {}
  date: 2022-10-14_12-20-56
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -201143.0543583375
  episode_reward_mean: -210243.3114715463
  episode_reward_min: -251380.2225740846
  episodes_this_iter: 60
  episodes_total: 2616
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.125153064727783
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0020269020460546017
          model: {}
          policy_loss: 0.0033439937978982925
          total_loss: 10.003437042236328
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,41,641.189,2623340.0,-210243,-201143,-251380,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,41,641.189,2623340.0,-210243,-201143,-251380,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2687328
  counters:
    num_agent_steps_sampled: 2687328
    num_agent_steps_trained: 2687328
    num_env_steps_sampled: 2687328
    num_env_steps_trained: 2687328
  custom_metrics: {}
  date: 2022-10-14_12-21-11
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -201143.0543583375
  episode_reward_mean: -209090.70772496102
  episode_reward_min: -218771.31384616307
  episodes_this_iter: 60
  episodes_total: 2676
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1268796920776367
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0014725906075909734
          model: {}
          policy_loss: 0.00046566128730773926
          total_loss: 10.000447273254395
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,42,656.093,2687330.0,-209091,-201143,-218771,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,42,656.093,2687330.0,-209091,-201143,-218771,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,42,656.093,2687330.0,-209091,-201143,-218771,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2751312
  counters:
    num_agent_steps_sampled: 2751312
    num_agent_steps_trained: 2751312
    num_env_steps_sampled: 2751312
    num_env_steps_trained: 2751312
  custom_metrics: {}
  date: 2022-10-14_12-21-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -201849.366158543
  episode_reward_mean: -210561.16480445326
  episode_reward_min: -219109.27082521812
  episodes_this_iter: 72
  episodes_total: 2748
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.12695574760437
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0018894858658313751
          model: {}
          policy_loss: 0.006222189404070377
          total_loss: 10.006287574768066
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,43,671.894,2751310.0,-210561,-201849,-219109,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,43,671.894,2751310.0,-210561,-201849,-219109,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2815296
  counters:
    num_agent_steps_sampled: 2815296
    num_agent_steps_trained: 2815296
    num_env_steps_sampled: 2815296
    num_env_steps_trained: 2815296
  custom_metrics: {}
  date: 2022-10-14_12-21-42
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203492.619766955
  episode_reward_mean: -211104.8466920097
  episode_reward_min: -273376.03640816786
  episodes_this_iter: 60
  episodes_total: 2808
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.124091863632202
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002389054512605071
          model: {}
          policy_loss: 0.002340897684916854
          total_loss: 10.002507209777832
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,44,686.917,2815300.0,-211105,-203493,-273376,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,44,686.917,2815300.0,-211105,-203493,-273376,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,44,686.917,2815300.0,-211105,-203493,-273376,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2879280
  counters:
    num_agent_steps_sampled: 2879280
    num_agent_steps_trained: 2879280
    num_env_steps_sampled: 2879280
    num_env_steps_trained: 2879280
  custom_metrics: {}
  date: 2022-10-14_12-21-58
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -201925.83368059693
  episode_reward_mean: -211954.36956953286
  episode_reward_min: -273376.03640816786
  episodes_this_iter: 60
  episodes_total: 2868
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.124141216278076
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0025546480901539326
          model: {}
          policy_loss: -0.00588981993496418
          total_loss: 9.994307518005371
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,45,703.11,2879280.0,-211954,-201926,-273376,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,45,703.11,2879280.0,-211954,-201926,-273376,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 2943264
  counters:
    num_agent_steps_sampled: 2943264
    num_agent_steps_trained: 2943264
    num_env_steps_sampled: 2943264
    num_env_steps_trained: 2943264
  custom_metrics: {}
  date: 2022-10-14_12-22-13
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203494.17482633176
  episode_reward_mean: -211341.66232782352
  episode_reward_min: -229291.99028035946
  episodes_this_iter: 72
  episodes_total: 2940
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1148719787597656
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001785535947419703
          model: {}
          policy_loss: 0.002856388920918107
          total_loss: 10.002902030944824
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,46,718.141,2943260.0,-211342,-203494,-229292,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,46,718.141,2943260.0,-211342,-203494,-229292,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,46,718.141,2943260.0,-211342,-203494,-229292,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3007248
  counters:
    num_agent_steps_sampled: 3007248
    num_agent_steps_trained: 3007248
    num_env_steps_sampled: 3007248
    num_env_steps_trained: 3007248
  custom_metrics: {}
  date: 2022-10-14_12-22-29
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -204621.5585590346
  episode_reward_mean: -210620.98052662
  episode_reward_min: -229291.99028035946
  episodes_this_iter: 60
  episodes_total: 3000
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.115812063217163
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0014630205696448684
          model: {}
          policy_loss: 0.005514125339686871
          total_loss: 10.005495071411133
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,47,734.512,3007250.0,-210621,-204622,-229292,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,47,734.512,3007250.0,-210621,-204622,-229292,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,47,734.512,3007250.0,-210621,-204622,-229292,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3071232
  counters:
    num_agent_steps_sampled: 3071232
    num_agent_steps_trained: 3071232
    num_env_steps_sampled: 3071232
    num_env_steps_trained: 3071232
  custom_metrics: {}
  date: 2022-10-14_12-22-45
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -204066.1226552671
  episode_reward_mean: -210108.98370469653
  episode_reward_min: -216883.67271508745
  episodes_this_iter: 60
  episodes_total: 3060
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1129753589630127
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0010660489788278937
          model: {}
          policy_loss: 0.0024476670660078526
          total_loss: 10.002349853515625
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,48,750.492,3071230.0,-210109,-204066,-216884,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,48,750.492,3071230.0,-210109,-204066,-216884,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3135216
  counters:
    num_agent_steps_sampled: 3135216
    num_agent_steps_trained: 3135216
    num_env_steps_sampled: 3135216
    num_env_steps_trained: 3135216
  custom_metrics: {}
  date: 2022-10-14_12-23-01
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -204058.18109268972
  episode_reward_mean: -210112.22566527434
  episode_reward_min: -218292.21040387842
  episodes_this_iter: 72
  episodes_total: 3132
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1119656562805176
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0018015984678640962
          model: {}
          policy_loss: 0.002420231467112899
          total_loss: 10.002470016479492
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,49,765.809,3135220.0,-210112,-204058,-218292,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,49,765.809,3135220.0,-210112,-204058,-218292,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3199200
  counters:
    num_agent_steps_sampled: 3199200
    num_agent_steps_trained: 3199200
    num_env_steps_sampled: 3199200
    num_env_steps_trained: 3199200
  custom_metrics: {}
  date: 2022-10-14_12-23-16
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -203181.4601174884
  episode_reward_mean: -210447.80910205952
  episode_reward_min: -267740.5793694569
  episodes_this_iter: 60
  episodes_total: 3192
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.1120998859405518
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019388669170439243
          model: {}
          policy_loss: 0.0021734798792749643
          total_loss: 10.002249717712402
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,50,781.578,3199200.0,-210448,-203181,-267741,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,50,781.578,3199200.0,-210448,-203181,-267741,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,50,781.578,3199200.0,-210448,-203181,-267741,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3263184
  counters:
    num_agent_steps_sampled: 3263184
    num_agent_steps_trained: 3263184
    num_env_steps_sampled: 3263184
    num_env_steps_trained: 3263184
  custom_metrics: {}
  date: 2022-10-14_12-23-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -201061.5664545836
  episode_reward_mean: -209158.51245016715
  episode_reward_min: -267740.5793694569
  episodes_this_iter: 60
  episodes_total: 3252
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.108823776245117
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002153043169528246
          model: {}
          policy_loss: 0.0018645982490852475
          total_loss: 10.001984596252441
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,51,796.416,3263180.0,-209159,-201062,-267741,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,51,796.416,3263180.0,-209159,-201062,-267741,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,51,796.416,3263180.0,-209159,-201062,-267741,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3327168
  counters:
    num_agent_steps_sampled: 3327168
    num_agent_steps_trained: 3327168
    num_env_steps_sampled: 3327168
    num_env_steps_trained: 3327168
  custom_metrics: {}
  date: 2022-10-14_12-23-47
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -201718.29990339532
  episode_reward_mean: -207736.6693367319
  episode_reward_min: -216350.84325706618
  episodes_this_iter: 72
  episodes_total: 3324
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.10085129737854
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002562887268140912
          model: {}
          policy_loss: 0.007424857467412949
          total_loss: 10.0076265335083
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,52,811.883,3327170.0,-207737,-201718,-216351,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,52,811.883,3327170.0,-207737,-201718,-216351,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3391152
  counters:
    num_agent_steps_sampled: 3391152
    num_agent_steps_trained: 3391152
    num_env_steps_sampled: 3391152
    num_env_steps_trained: 3391152
  custom_metrics: {}
  date: 2022-10-14_12-24-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -201416.86209345935
  episode_reward_mean: -207046.11940520146
  episode_reward_min: -216446.5624434
  episodes_this_iter: 60
  episodes_total: 3384
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0951685905456543
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002000845968723297
          model: {}
          policy_loss: 0.004241396673023701
          total_loss: 10.004331588745117
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,53,827.173,3391150.0,-207046,-201417,-216447,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,53,827.173,3391150.0,-207046,-201417,-216447,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,53,827.173,3391150.0,-207046,-201417,-216447,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,53,827.173,3391150.0,-207046,-201417,-216447,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3455136
  counters:
    num_agent_steps_sampled: 3455136
    num_agent_steps_trained: 3455136
    num_env_steps_sampled: 3455136
    num_env_steps_trained: 3455136
  custom_metrics: {}
  date: 2022-10-14_12-24-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199007.79897711374
  episode_reward_mean: -206777.48932434813
  episode_reward_min: -216446.5624434
  episodes_this_iter: 60
  episodes_total: 3444
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.094921588897705
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001932706218212843
          model: {}
          policy_loss: 0.002328774891793728
          total_loss: 10.002406120300293
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,54,843.042,3455140.0,-206777,-199008,-216447,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,54,843.042,3455140.0,-206777,-199008,-216447,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,54,843.042,3455140.0,-206777,-199008,-216447,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3519120
  counters:
    num_agent_steps_sampled: 3519120
    num_agent_steps_trained: 3519120
    num_env_steps_sampled: 3519120
    num_env_steps_trained: 3519120
  custom_metrics: {}
  date: 2022-10-14_12-24-34
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -200370.3557028901
  episode_reward_mean: -206148.95514783013
  episode_reward_min: -215159.09972463624
  episodes_this_iter: 72
  episodes_total: 3516
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.091977596282959
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017911981558427215
          model: {}
          policy_loss: 0.007390835788100958
          total_loss: 10.007440567016602
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,55,858.784,3519120.0,-206149,-200370,-215159,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,55,858.784,3519120.0,-206149,-200370,-215159,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3583104
  counters:
    num_agent_steps_sampled: 3583104
    num_agent_steps_trained: 3583104
    num_env_steps_sampled: 3583104
    num_env_steps_trained: 3583104
  custom_metrics: {}
  date: 2022-10-14_12-24-49
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198687.53782411714
  episode_reward_mean: -205302.62314265713
  episode_reward_min: -217748.37403604336
  episodes_this_iter: 60
  episodes_total: 3576
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0900039672851562
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0023225778713822365
          model: {}
          policy_loss: 0.0035094604827463627
          total_loss: 10.00366497039795
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,56,873.678,3583100.0,-205303,-198688,-217748,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,56,873.678,3583100.0,-205303,-198688,-217748,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3647088
  counters:
    num_agent_steps_sampled: 3647088
    num_agent_steps_trained: 3647088
    num_env_steps_sampled: 3647088
    num_env_steps_trained: 3647088
  custom_metrics: {}
  date: 2022-10-14_12-25-04
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198218.01886927022
  episode_reward_mean: -204950.5325652503
  episode_reward_min: -220303.23396581
  episodes_this_iter: 60
  episodes_total: 3636
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0916552543640137
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0020159841515123844
          model: {}
          policy_loss: 0.0012899013236165047
          total_loss: 10.001383781433105
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,57,888.448,3647090.0,-204951,-198218,-220303,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,57,888.448,3647090.0,-204951,-198218,-220303,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,57,888.448,3647090.0,-204951,-198218,-220303,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3711072
  counters:
    num_agent_steps_sampled: 3711072
    num_agent_steps_trained: 3711072
    num_env_steps_sampled: 3711072
    num_env_steps_trained: 3711072
  custom_metrics: {}
  date: 2022-10-14_12-25-19
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -199158.64226552387
  episode_reward_mean: -207411.1501650241
  episode_reward_min: -283769.5423456459
  episodes_this_iter: 72
  episodes_total: 3708
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0930819511413574
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017828620038926601
          model: {}
          policy_loss: 0.00714085204526782
          total_loss: 10.007187843322754
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,58,903.865,3711070.0,-207411,-199159,-283770,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,58,903.865,3711070.0,-207411,-199159,-283770,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,58,903.865,3711070.0,-207411,-199159,-283770,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3775056
  counters:
    num_agent_steps_sampled: 3775056
    num_agent_steps_trained: 3775056
    num_env_steps_sampled: 3775056
    num_env_steps_trained: 3775056
  custom_metrics: {}
  date: 2022-10-14_12-25-35
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197808.8583960744
  episode_reward_mean: -205675.7048134768
  episode_reward_min: -283227.4143429623
  episodes_this_iter: 60
  episodes_total: 3768
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.085527181625366
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0016014217399060726
          model: {}
          policy_loss: 0.0019145153928548098
          total_loss: 10.00192642211914
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,59,919.948,3775060.0,-205676,-197809,-283227,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,59,919.948,3775060.0,-205676,-197809,-283227,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,59,919.948,3775060.0,-205676,-197809,-283227,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3839040
  counters:
    num_agent_steps_sampled: 3839040
    num_agent_steps_trained: 3839040
    num_env_steps_sampled: 3839040
    num_env_steps_trained: 3839040
  custom_metrics: {}
  date: 2022-10-14_12-25-51
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197490.19658962815
  episode_reward_mean: -204503.20313254744
  episode_reward_min: -213930.8072238365
  episodes_this_iter: 60
  episodes_total: 3828
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.079108476638794
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017297810409218073
          model: {}
          policy_loss: 0.0016057547181844711
          total_loss: 10.001644134521484
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,60,935.477,3839040.0,-204503,-197490,-213931,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,60,935.477,3839040.0,-204503,-197490,-213931,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,60,935.477,3839040.0,-204503,-197490,-213931,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3903024
  counters:
    num_agent_steps_sampled: 3903024
    num_agent_steps_trained: 3903024
    num_env_steps_sampled: 3903024
    num_env_steps_trained: 3903024
  custom_metrics: {}
  date: 2022-10-14_12-26-06
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198927.37062635864
  episode_reward_mean: -205102.90477094988
  episode_reward_min: -234826.79584738126
  episodes_this_iter: 72
  episodes_total: 3900
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.07684326171875
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001798426965251565
          model: {}
          policy_loss: 0.006349161267280579
          total_loss: 10.006400108337402
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,61,950.854,3903020.0,-205103,-198927,-234827,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,61,950.854,3903020.0,-205103,-198927,-234827,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 3967008
  counters:
    num_agent_steps_sampled: 3967008
    num_agent_steps_trained: 3967008
    num_env_steps_sampled: 3967008
    num_env_steps_trained: 3967008
  custom_metrics: {}
  date: 2022-10-14_12-26-21
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198588.3140241591
  episode_reward_mean: -207207.14721486828
  episode_reward_min: -269937.2523143925
  episodes_this_iter: 60
  episodes_total: 3960
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0740396976470947
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0024448654148727655
          model: {}
          policy_loss: 0.0036656130105257034
          total_loss: 10.003847122192383
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,62,965.863,3967010.0,-207207,-198588,-269937,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,62,965.863,3967010.0,-207207,-198588,-269937,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,62,965.863,3967010.0,-207207,-198588,-269937,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4030992
  counters:
    num_agent_steps_sampled: 4030992
    num_agent_steps_trained: 4030992
    num_env_steps_sampled: 4030992
    num_env_steps_trained: 4030992
  custom_metrics: {}
  date: 2022-10-14_12-26-36
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197108.43548622134
  episode_reward_mean: -205820.62776283437
  episode_reward_min: -253133.90261827092
  episodes_this_iter: 60
  episodes_total: 4020
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0630714893341064
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002168940845876932
          model: {}
          policy_loss: 0.002763541415333748
          total_loss: 10.002890586853027
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,63,981.059,4030990.0,-205821,-197108,-253134,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,63,981.059,4030990.0,-205821,-197108,-253134,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4094976
  counters:
    num_agent_steps_sampled: 4094976
    num_agent_steps_trained: 4094976
    num_env_steps_sampled: 4094976
    num_env_steps_trained: 4094976
  custom_metrics: {}
  date: 2022-10-14_12-26-52
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198166.32729571735
  episode_reward_mean: -204170.79646591446
  episode_reward_min: -213961.69659830548
  episodes_this_iter: 72
  episodes_total: 4092
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.057204484939575
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0018753528129309416
          model: {}
          policy_loss: 0.005624176934361458
          total_loss: 10.005694389343262
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,64,996.213,4094980.0,-204171,-198166,-213962,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,64,996.213,4094980.0,-204171,-198166,-213962,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,64,996.213,4094980.0,-204171,-198166,-213962,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4158960
  counters:
    num_agent_steps_sampled: 4158960
    num_agent_steps_trained: 4158960
    num_env_steps_sampled: 4158960
    num_env_steps_trained: 4158960
  custom_metrics: {}
  date: 2022-10-14_12-27-07
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198189.26817320997
  episode_reward_mean: -203772.64477314064
  episode_reward_min: -219985.65816770607
  episodes_this_iter: 60
  episodes_total: 4152
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.054586172103882
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0020445608533918858
          model: {}
          policy_loss: 0.0023171440698206425
          total_loss: 10.002420425415039
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,65,1011.95,4158960.0,-203773,-198189,-219986,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,65,1011.95,4158960.0,-203773,-198189,-219986,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,65,1011.95,4158960.0,-203773,-198189,-219986,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4222944
  counters:
    num_agent_steps_sampled: 4222944
    num_agent_steps_trained: 4222944
    num_env_steps_sampled: 4222944
    num_env_steps_trained: 4222944
  custom_metrics: {}
  date: 2022-10-14_12-27-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196593.62263427526
  episode_reward_mean: -204697.88013235683
  episode_reward_min: -256606.001004404
  episodes_this_iter: 60
  episodes_total: 4212
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0513429641723633
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002177581889554858
          model: {}
          policy_loss: 0.0005826698034070432
          total_loss: 10.000713348388672
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,66,1027.4,4222940.0,-204698,-196594,-256606,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,66,1027.4,4222940.0,-204698,-196594,-256606,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4286928
  counters:
    num_agent_steps_sampled: 4286928
    num_agent_steps_trained: 4286928
    num_env_steps_sampled: 4286928
    num_env_steps_trained: 4286928
  custom_metrics: {}
  date: 2022-10-14_12-27-38
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196593.62263427526
  episode_reward_mean: -204676.488779084
  episode_reward_min: -256606.001004404
  episodes_this_iter: 72
  episodes_total: 4284
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.05147123336792
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0018241916550323367
          model: {}
          policy_loss: 0.002646371955052018
          total_loss: 10.002706527709961
          vf_explained_var

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,67,1042.8,4286930.0,-204676,-196594,-256606,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,67,1042.8,4286930.0,-204676,-196594,-256606,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,67,1042.8,4286930.0,-204676,-196594,-256606,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4350912
  counters:
    num_agent_steps_sampled: 4350912
    num_agent_steps_trained: 4350912
    num_env_steps_sampled: 4350912
    num_env_steps_trained: 4350912
  custom_metrics: {}
  date: 2022-10-14_12-27-54
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196331.4015021484
  episode_reward_mean: -204055.6499073507
  episode_reward_min: -210132.73478825073
  episodes_this_iter: 60
  episodes_total: 4344
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0482003688812256
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002070184564217925
          model: {}
          policy_loss: 0.0016516397008672357
          total_loss: 10.001760482788086
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,68,1058.69,4350910.0,-204056,-196331,-210133,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,68,1058.69,4350910.0,-204056,-196331,-210133,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,68,1058.69,4350910.0,-204056,-196331,-210133,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4414896
  counters:
    num_agent_steps_sampled: 4414896
    num_agent_steps_trained: 4414896
    num_env_steps_sampled: 4414896
    num_env_steps_trained: 4414896
  custom_metrics: {}
  date: 2022-10-14_12-28-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198138.66982923675
  episode_reward_mean: -204275.64610600204
  episode_reward_min: -213686.58678551123
  episodes_this_iter: 60
  episodes_total: 4404
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0484304428100586
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0022594621405005455
          model: {}
          policy_loss: -0.004959865938872099
          total_loss: 9.99518871307373
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,69,1073.95,4414900.0,-204276,-198139,-213687,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,69,1073.95,4414900.0,-204276,-198139,-213687,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,69,1073.95,4414900.0,-204276,-198139,-213687,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4478880
  counters:
    num_agent_steps_sampled: 4478880
    num_agent_steps_trained: 4478880
    num_env_steps_sampled: 4478880
    num_env_steps_trained: 4478880
  custom_metrics: {}
  date: 2022-10-14_12-28-25
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197926.93057874523
  episode_reward_mean: -203912.76286958612
  episode_reward_min: -214524.4927342088
  episodes_this_iter: 72
  episodes_total: 4476
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.050753116607666
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001667803735472262
          model: {}
          policy_loss: 0.002219259738922119
          total_loss: 10.00224781036377
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,70,1089.53,4478880.0,-203913,-197927,-214524,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,70,1089.53,4478880.0,-203913,-197927,-214524,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,70,1089.53,4478880.0,-203913,-197927,-214524,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4542864
  counters:
    num_agent_steps_sampled: 4542864
    num_agent_steps_trained: 4542864
    num_env_steps_sampled: 4542864
    num_env_steps_trained: 4542864
  custom_metrics: {}
  date: 2022-10-14_12-28-41
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -198688.24192980622
  episode_reward_mean: -203448.04673760623
  episode_reward_min: -214524.4927342088
  episodes_this_iter: 60
  episodes_total: 4536
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.045938730239868
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019442883785814047
          model: {}
          policy_loss: 0.004992700647562742
          total_loss: 10.005077362060547
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,71,1105.3,4542860.0,-203448,-198688,-214524,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,71,1105.3,4542860.0,-203448,-198688,-214524,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,71,1105.3,4542860.0,-203448,-198688,-214524,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4606848
  counters:
    num_agent_steps_sampled: 4606848
    num_agent_steps_trained: 4606848
    num_env_steps_sampled: 4606848
    num_env_steps_trained: 4606848
  custom_metrics: {}
  date: 2022-10-14_12-28-57
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197896.68152869362
  episode_reward_mean: -203348.4422838128
  episode_reward_min: -218315.8232404109
  episodes_this_iter: 60
  episodes_total: 4596
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0451269149780273
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019354162504896522
          model: {}
          policy_loss: 0.001067624893039465
          total_loss: 10.001150131225586
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,72,1121.21,4606850.0,-203348,-197897,-218316,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,72,1121.21,4606850.0,-203348,-197897,-218316,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,72,1121.21,4606850.0,-203348,-197897,-218316,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4670832
  counters:
    num_agent_steps_sampled: 4670832
    num_agent_steps_trained: 4670832
    num_env_steps_sampled: 4670832
    num_env_steps_trained: 4670832
  custom_metrics: {}
  date: 2022-10-14_12-29-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196313.45008928442
  episode_reward_mean: -203218.1880100631
  episode_reward_min: -213828.99293795438
  episodes_this_iter: 72
  episodes_total: 4668
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.051132917404175
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0020879467483609915
          model: {}
          policy_loss: 0.006989163812249899
          total_loss: 10.007102012634277
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,73,1136.58,4670830.0,-203218,-196313,-213829,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,73,1136.58,4670830.0,-203218,-196313,-213829,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4734816
  counters:
    num_agent_steps_sampled: 4734816
    num_agent_steps_trained: 4734816
    num_env_steps_sampled: 4734816
    num_env_steps_trained: 4734816
  custom_metrics: {}
  date: 2022-10-14_12-29-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196313.45008928442
  episode_reward_mean: -204342.92212778374
  episode_reward_min: -282814.3090520009
  episodes_this_iter: 60
  episodes_total: 4728
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.046569347381592
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0024900096468627453
          model: {}
          policy_loss: 0.0009922027820721269
          total_loss: 10.001185417175293
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,74,1152.05,4734820.0,-204343,-196313,-282814,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,74,1152.05,4734820.0,-204343,-196313,-282814,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4798800
  counters:
    num_agent_steps_sampled: 4798800
    num_agent_steps_trained: 4798800
    num_env_steps_sampled: 4798800
    num_env_steps_trained: 4798800
  custom_metrics: {}
  date: 2022-10-14_12-29-43
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -197430.25571518549
  episode_reward_mean: -204027.2744258288
  episode_reward_min: -282814.3090520009
  episodes_this_iter: 60
  episodes_total: 4788
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0412936210632324
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002029765397310257
          model: {}
          policy_loss: 6.987573578953743e-05
          total_loss: 10.000170707702637
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,75,1166.93,4798800.0,-204027,-197430,-282814,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,75,1166.93,4798800.0,-204027,-197430,-282814,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,75,1166.93,4798800.0,-204027,-197430,-282814,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4862784
  counters:
    num_agent_steps_sampled: 4862784
    num_agent_steps_trained: 4862784
    num_env_steps_sampled: 4862784
    num_env_steps_trained: 4862784
  custom_metrics: {}
  date: 2022-10-14_12-29-58
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196178.24050406768
  episode_reward_mean: -204345.6093412924
  episode_reward_min: -270032.6033101867
  episodes_this_iter: 72
  episodes_total: 4860
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0453131198883057
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0022886618971824646
          model: {}
          policy_loss: 0.006944410502910614
          total_loss: 10.007097244262695
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,76,1182.74,4862780.0,-204346,-196178,-270033,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,76,1182.74,4862780.0,-204346,-196178,-270033,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4926768
  counters:
    num_agent_steps_sampled: 4926768
    num_agent_steps_trained: 4926768
    num_env_steps_sampled: 4926768
    num_env_steps_trained: 4926768
  custom_metrics: {}
  date: 2022-10-14_12-30-13
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195866.69122603137
  episode_reward_mean: -203604.62479654522
  episode_reward_min: -270032.6033101867
  episodes_this_iter: 60
  episodes_total: 4920
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0443601608276367
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001945028081536293
          model: {}
          policy_loss: 0.0031095538288354874
          total_loss: 10.003194808959961
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,77,1197.71,4926770.0,-203605,-195867,-270033,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,77,1197.71,4926770.0,-203605,-195867,-270033,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,77,1197.71,4926770.0,-203605,-195867,-270033,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 4990752
  counters:
    num_agent_steps_sampled: 4990752
    num_agent_steps_trained: 4990752
    num_env_steps_sampled: 4990752
    num_env_steps_trained: 4990752
  custom_metrics: {}
  date: 2022-10-14_12-30-29
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195866.69122603137
  episode_reward_mean: -202392.6525821397
  episode_reward_min: -210127.84984943643
  episodes_this_iter: 60
  episodes_total: 4980
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.049996852874756
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0018802082631736994
          model: {}
          policy_loss: 0.0023330599069595337
          total_loss: 10.00240421295166
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,78,1213,4990750.0,-202393,-195867,-210128,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,78,1213,4990750.0,-202393,-195867,-210128,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5054736
  counters:
    num_agent_steps_sampled: 5054736
    num_agent_steps_trained: 5054736
    num_env_steps_sampled: 5054736
    num_env_steps_trained: 5054736
  custom_metrics: {}
  date: 2022-10-14_12-30-44
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -196014.3391998138
  episode_reward_mean: -202177.3241307193
  episode_reward_min: -210127.84984943643
  episodes_this_iter: 72
  episodes_total: 5052
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0589303970336914
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017351630376651883
          model: {}
          policy_loss: 0.006487692706286907
          total_loss: 10.006529808044434
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,79,1227.97,5054740.0,-202177,-196014,-210128,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,79,1227.97,5054740.0,-202177,-196014,-210128,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,79,1227.97,5054740.0,-202177,-196014,-210128,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5118720
  counters:
    num_agent_steps_sampled: 5118720
    num_agent_steps_trained: 5118720
    num_env_steps_sampled: 5118720
    num_env_steps_trained: 5118720
  custom_metrics: {}
  date: 2022-10-14_12-30-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195035.87452056087
  episode_reward_mean: -201972.88153149252
  episode_reward_min: -213750.19572346038
  episodes_this_iter: 60
  episodes_total: 5112
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.062365770339966
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0014694205019623041
          model: {}
          policy_loss: 0.0027210917323827744
          total_loss: 10.002708435058594
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,80,1243.16,5118720.0,-201973,-195036,-213750,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,80,1243.16,5118720.0,-201973,-195036,-213750,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,80,1243.16,5118720.0,-201973,-195036,-213750,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5182704
  counters:
    num_agent_steps_sampled: 5182704
    num_agent_steps_trained: 5182704
    num_env_steps_sampled: 5182704
    num_env_steps_trained: 5182704
  custom_metrics: {}
  date: 2022-10-14_12-31-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195035.87452056087
  episode_reward_mean: -202150.0500864693
  episode_reward_min: -213697.08041271794
  episodes_this_iter: 60
  episodes_total: 5172
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.060580253601074
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017315176082774997
          model: {}
          policy_loss: 0.0021941419690847397
          total_loss: 10.002233505249023
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,81,1259.11,5182700.0,-202150,-195036,-213697,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,81,1259.11,5182700.0,-202150,-195036,-213697,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5246688
  counters:
    num_agent_steps_sampled: 5246688
    num_agent_steps_trained: 5246688
    num_env_steps_sampled: 5246688
    num_env_steps_trained: 5246688
  custom_metrics: {}
  date: 2022-10-14_12-31-30
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195961.7913125172
  episode_reward_mean: -201616.5387006141
  episode_reward_min: -213826.48647899483
  episodes_this_iter: 72
  episodes_total: 5244
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0633647441864014
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0013658785028383136
          model: {}
          policy_loss: 0.0071385642513632774
          total_loss: 10.00710678100586
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,82,1274.44,5246690.0,-201617,-195962,-213826,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,82,1274.44,5246690.0,-201617,-195962,-213826,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,82,1274.44,5246690.0,-201617,-195962,-213826,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,82,1274.44,5246690.0,-201617,-195962,-213826,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5310672
  counters:
    num_agent_steps_sampled: 5310672
    num_agent_steps_trained: 5310672
    num_env_steps_sampled: 5310672
    num_env_steps_trained: 5310672
  custom_metrics: {}
  date: 2022-10-14_12-31-47
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195057.95553914396
  episode_reward_mean: -201571.92261616435
  episode_reward_min: -213826.48647899483
  episodes_this_iter: 60
  episodes_total: 5304
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0545639991760254
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017130468040704727
          model: {}
          policy_loss: 0.0030614566057920456
          total_loss: 10.003098487854004
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,83,1290.86,5310670.0,-201572,-195058,-213826,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,83,1290.86,5310670.0,-201572,-195058,-213826,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,83,1290.86,5310670.0,-201572,-195058,-213826,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5374656
  counters:
    num_agent_steps_sampled: 5374656
    num_agent_steps_trained: 5374656
    num_env_steps_sampled: 5374656
    num_env_steps_trained: 5374656
  custom_metrics: {}
  date: 2022-10-14_12-32-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -195057.95553914396
  episode_reward_mean: -201496.97528478503
  episode_reward_min: -221611.36277709386
  episodes_this_iter: 60
  episodes_total: 5364
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.059065341949463
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002208181656897068
          model: {}
          policy_loss: 0.0010320725850760937
          total_loss: 10.001167297363281
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,84,1306.12,5374660.0,-201497,-195058,-221611,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,84,1306.12,5374660.0,-201497,-195058,-221611,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,84,1306.12,5374660.0,-201497,-195058,-221611,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5438640
  counters:
    num_agent_steps_sampled: 5438640
    num_agent_steps_trained: 5438640
    num_env_steps_sampled: 5438640
    num_env_steps_trained: 5438640
  custom_metrics: {}
  date: 2022-10-14_12-32-19
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194091.68142748435
  episode_reward_mean: -201011.30953872076
  episode_reward_min: -221611.36277709386
  episodes_this_iter: 72
  episodes_total: 5436
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.055572509765625
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0021894818637520075
          model: {}
          policy_loss: 0.005966085009276867
          total_loss: 10.006097793579102
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,85,1322.56,5438640.0,-201011,-194092,-221611,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,85,1322.56,5438640.0,-201011,-194092,-221611,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,85,1322.56,5438640.0,-201011,-194092,-221611,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5502624
  counters:
    num_agent_steps_sampled: 5502624
    num_agent_steps_trained: 5502624
    num_env_steps_sampled: 5502624
    num_env_steps_trained: 5502624
  custom_metrics: {}
  date: 2022-10-14_12-32-34
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194091.68142748435
  episode_reward_mean: -200901.06695715763
  episode_reward_min: -208997.92061321015
  episodes_this_iter: 60
  episodes_total: 5496
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.054316520690918
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0021568210795521736
          model: {}
          policy_loss: 0.0023503247648477554
          total_loss: 10.002476692199707
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,86,1338.27,5502620.0,-200901,-194092,-208998,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,86,1338.27,5502620.0,-200901,-194092,-208998,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,86,1338.27,5502620.0,-200901,-194092,-208998,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5566608
  counters:
    num_agent_steps_sampled: 5566608
    num_agent_steps_trained: 5566608
    num_env_steps_sampled: 5566608
    num_env_steps_trained: 5566608
  custom_metrics: {}
  date: 2022-10-14_12-32-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192988.35553517897
  episode_reward_mean: -200488.17914484377
  episode_reward_min: -208997.92061321015
  episodes_this_iter: 60
  episodes_total: 5556
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.043914318084717
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0016257179668173194
          model: {}
          policy_loss: 0.0022464680951088667
          total_loss: 10.002266883850098
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,87,1353.79,5566610.0,-200488,-192988,-208998,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,87,1353.79,5566610.0,-200488,-192988,-208998,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,87,1353.79,5566610.0,-200488,-192988,-208998,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5630592
  counters:
    num_agent_steps_sampled: 5630592
    num_agent_steps_trained: 5630592
    num_env_steps_sampled: 5630592
    num_env_steps_trained: 5630592
  custom_metrics: {}
  date: 2022-10-14_12-33-06
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192988.35553517897
  episode_reward_mean: -199419.4541522317
  episode_reward_min: -207235.46835697992
  episodes_this_iter: 72
  episodes_total: 5628
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0375723838806152
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001982492860406637
          model: {}
          policy_loss: 0.005671824794262648
          total_loss: 10.005764961242676
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,88,1369.64,5630590.0,-199419,-192988,-207235,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,88,1369.64,5630590.0,-199419,-192988,-207235,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,88,1369.64,5630590.0,-199419,-192988,-207235,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5694576
  counters:
    num_agent_steps_sampled: 5694576
    num_agent_steps_trained: 5694576
    num_env_steps_sampled: 5694576
    num_env_steps_trained: 5694576
  custom_metrics: {}
  date: 2022-10-14_12-33-21
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194367.24266126033
  episode_reward_mean: -198898.0163877001
  episode_reward_min: -204651.90629439885
  episodes_this_iter: 60
  episodes_total: 5688
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0394766330718994
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017254347912967205
          model: {}
          policy_loss: 0.0024262634105980396
          total_loss: 10.00246810913086
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,89,1385.12,5694580.0,-198898,-194367,-204652,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,89,1385.12,5694580.0,-198898,-194367,-204652,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,89,1385.12,5694580.0,-198898,-194367,-204652,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5758560
  counters:
    num_agent_steps_sampled: 5758560
    num_agent_steps_trained: 5758560
    num_env_steps_sampled: 5758560
    num_env_steps_trained: 5758560
  custom_metrics: {}
  date: 2022-10-14_12-33-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194595.36176408423
  episode_reward_mean: -199700.69465492442
  episode_reward_min: -209327.0679408759
  episodes_this_iter: 60
  episodes_total: 5748
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0456252098083496
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0021726470440626144
          model: {}
          policy_loss: -0.004090239759534597
          total_loss: 9.996039390563965
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,90,1401.09,5758560.0,-199701,-194595,-209327,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,90,1401.09,5758560.0,-199701,-194595,-209327,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,90,1401.09,5758560.0,-199701,-194595,-209327,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5822544
  counters:
    num_agent_steps_sampled: 5822544
    num_agent_steps_trained: 5822544
    num_env_steps_sampled: 5822544
    num_env_steps_trained: 5822544
  custom_metrics: {}
  date: 2022-10-14_12-33-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194889.12445942633
  episode_reward_mean: -199605.70221577835
  episode_reward_min: -205274.8734062402
  episodes_this_iter: 72
  episodes_total: 5820
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0490639209747314
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0022852953989058733
          model: {}
          policy_loss: 0.0021264732349663973
          total_loss: 10.002280235290527
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,91,1416.79,5822540.0,-199606,-194889,-205275,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,91,1416.79,5822540.0,-199606,-194889,-205275,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,91,1416.79,5822540.0,-199606,-194889,-205275,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5886528
  counters:
    num_agent_steps_sampled: 5886528
    num_agent_steps_trained: 5886528
    num_env_steps_sampled: 5886528
    num_env_steps_trained: 5886528
  custom_metrics: {}
  date: 2022-10-14_12-34-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194152.4825892386
  episode_reward_mean: -198957.07322699868
  episode_reward_min: -205007.7785190491
  episodes_this_iter: 60
  episodes_total: 5880
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0429794788360596
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019301999127492309
          model: {}
          policy_loss: 0.0056528192944824696
          total_loss: 10.005733489990234
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,92,1432.61,5886530.0,-198957,-194152,-205008,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,92,1432.61,5886530.0,-198957,-194152,-205008,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,92,1432.61,5886530.0,-198957,-194152,-205008,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 5950512
  counters:
    num_agent_steps_sampled: 5950512
    num_agent_steps_trained: 5950512
    num_env_steps_sampled: 5950512
    num_env_steps_trained: 5950512
  custom_metrics: {}
  date: 2022-10-14_12-34-24
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -194152.4825892386
  episode_reward_mean: -199170.98882802262
  episode_reward_min: -208635.49914251655
  episodes_this_iter: 60
  episodes_total: 5940
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0425162315368652
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0013906549429520965
          model: {}
          policy_loss: 0.00277988170273602
          total_loss: 10.002754211425781
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,93,1448.17,5950510.0,-199171,-194152,-208635,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,93,1448.17,5950510.0,-199171,-194152,-208635,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,93,1448.17,5950510.0,-199171,-194152,-208635,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6014496
  counters:
    num_agent_steps_sampled: 6014496
    num_agent_steps_trained: 6014496
    num_env_steps_sampled: 6014496
    num_env_steps_trained: 6014496
  custom_metrics: {}
  date: 2022-10-14_12-34-40
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -193975.2361685401
  episode_reward_mean: -199101.25982154146
  episode_reward_min: -210580.39865432636
  episodes_this_iter: 72
  episodes_total: 6012
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0345237255096436
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002613974967971444
          model: {}
          policy_loss: 6.164518708828837e-05
          total_loss: 10.000280380249023
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,94,1464.09,6014500.0,-199101,-193975,-210580,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,94,1464.09,6014500.0,-199101,-193975,-210580,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,94,1464.09,6014500.0,-199101,-193975,-210580,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6078480
  counters:
    num_agent_steps_sampled: 6078480
    num_agent_steps_trained: 6078480
    num_env_steps_sampled: 6078480
    num_env_steps_trained: 6078480
  custom_metrics: {}
  date: 2022-10-14_12-34-56
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192873.30516481274
  episode_reward_mean: -198575.63559803006
  episode_reward_min: -210580.39865432636
  episodes_this_iter: 60
  episodes_total: 6072
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0366756916046143
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017711444525048137
          model: {}
          policy_loss: 0.001527270651422441
          total_loss: 10.001578330993652
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,95,1479.61,6078480.0,-198576,-192873,-210580,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,95,1479.61,6078480.0,-198576,-192873,-210580,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,95,1479.61,6078480.0,-198576,-192873,-210580,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6142464
  counters:
    num_agent_steps_sampled: 6142464
    num_agent_steps_trained: 6142464
    num_env_steps_sampled: 6142464
    num_env_steps_trained: 6142464
  custom_metrics: {}
  date: 2022-10-14_12-35-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192873.30516481274
  episode_reward_mean: -198270.58298801142
  episode_reward_min: -208243.24658263824
  episodes_this_iter: 60
  episodes_total: 6132
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0328190326690674
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001983275404199958
          model: {}
          policy_loss: 0.0016473153373226523
          total_loss: 10.001740455627441
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,96,1495.47,6142460.0,-198271,-192873,-208243,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,96,1495.47,6142460.0,-198271,-192873,-208243,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,96,1495.47,6142460.0,-198271,-192873,-208243,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6206448
  counters:
    num_agent_steps_sampled: 6206448
    num_agent_steps_trained: 6206448
    num_env_steps_sampled: 6206448
    num_env_steps_trained: 6206448
  custom_metrics: {}
  date: 2022-10-14_12-35-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192437.20738004986
  episode_reward_mean: -198226.58898928226
  episode_reward_min: -208846.4480127758
  episodes_this_iter: 72
  episodes_total: 6204
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0334360599517822
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019070057896897197
          model: {}
          policy_loss: 0.006527278572320938
          total_loss: 10.00660514831543
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,97,1511.47,6206450.0,-198227,-192437,-208846,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,97,1511.47,6206450.0,-198227,-192437,-208846,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,97,1511.47,6206450.0,-198227,-192437,-208846,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6270432
  counters:
    num_agent_steps_sampled: 6270432
    num_agent_steps_trained: 6270432
    num_env_steps_sampled: 6270432
    num_env_steps_trained: 6270432
  custom_metrics: {}
  date: 2022-10-14_12-35-43
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192437.20738004986
  episode_reward_mean: -199223.44076962967
  episode_reward_min: -218552.6417391568
  episodes_this_iter: 60
  episodes_total: 6264
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.03695011138916
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019296678947284818
          model: {}
          policy_loss: 0.006028033792972565
          total_loss: 10.006110191345215
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,98,1526.82,6270430.0,-199223,-192437,-218553,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,98,1526.82,6270430.0,-199223,-192437,-218553,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,98,1526.82,6270430.0,-199223,-192437,-218553,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6334416
  counters:
    num_agent_steps_sampled: 6334416
    num_agent_steps_trained: 6334416
    num_env_steps_sampled: 6334416
    num_env_steps_trained: 6334416
  custom_metrics: {}
  date: 2022-10-14_12-35-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -193058.5024655844
  episode_reward_mean: -199215.05146092325
  episode_reward_min: -219839.44927017175
  episodes_this_iter: 60
  episodes_total: 6324
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.032841205596924
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0022375923581421375
          model: {}
          policy_loss: 0.0005803573876619339
          total_loss: 10.000724792480469
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,99,1542.69,6334420.0,-199215,-193059,-219839,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,99,1542.69,6334420.0,-199215,-193059,-219839,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,99,1542.69,6334420.0,-199215,-193059,-219839,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6398400
  counters:
    num_agent_steps_sampled: 6398400
    num_agent_steps_trained: 6398400
    num_env_steps_sampled: 6398400
    num_env_steps_trained: 6398400
  custom_metrics: {}
  date: 2022-10-14_12-36-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191804.9193443664
  episode_reward_mean: -198453.81705990777
  episode_reward_min: -219839.44927017175
  episodes_this_iter: 72
  episodes_total: 6396
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0192806720733643
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002408919157460332
          model: {}
          policy_loss: 0.003906775265932083
          total_loss: 10.0040864944458
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,100,1558.13,6398400.0,-198454,-191805,-219839,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,100,1558.13,6398400.0,-198454,-191805,-219839,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,100,1558.13,6398400.0,-198454,-191805,-219839,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6462384
  counters:
    num_agent_steps_sampled: 6462384
    num_agent_steps_trained: 6462384
    num_env_steps_sampled: 6462384
    num_env_steps_trained: 6462384
  custom_metrics: {}
  date: 2022-10-14_12-36-31
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191804.9193443664
  episode_reward_mean: -197512.129222581
  episode_reward_min: -205941.64192320177
  episodes_this_iter: 60
  episodes_total: 6456
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.027315139770508
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0021440559066832066
          model: {}
          policy_loss: 0.004059511236846447
          total_loss: 10.004185676574707
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,101,1573.99,6462380.0,-197512,-191805,-205942,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,101,1573.99,6462380.0,-197512,-191805,-205942,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,101,1573.99,6462380.0,-197512,-191805,-205942,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6526368
  counters:
    num_agent_steps_sampled: 6526368
    num_agent_steps_trained: 6526368
    num_env_steps_sampled: 6526368
    num_env_steps_trained: 6526368
  custom_metrics: {}
  date: 2022-10-14_12-36-46
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -192487.8989052338
  episode_reward_mean: -197452.12697655745
  episode_reward_min: -204639.5919588653
  episodes_this_iter: 60
  episodes_total: 6516
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.024416446685791
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002439779695123434
          model: {}
          policy_loss: 0.0012100031599402428
          total_loss: 10.001395225524902
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,102,1589.54,6526370.0,-197452,-192488,-204640,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,102,1589.54,6526370.0,-197452,-192488,-204640,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,102,1589.54,6526370.0,-197452,-192488,-204640,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6590352
  counters:
    num_agent_steps_sampled: 6590352
    num_agent_steps_trained: 6590352
    num_env_steps_sampled: 6590352
    num_env_steps_trained: 6590352
  custom_metrics: {}
  date: 2022-10-14_12-37-03
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191299.0942261166
  episode_reward_mean: -197158.6196325903
  episode_reward_min: -210614.4027939816
  episodes_this_iter: 72
  episodes_total: 6588
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.024167537689209
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0016967899864539504
          model: {}
          policy_loss: 0.006191089749336243
          total_loss: 10.006227493286133
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,103,1606.01,6590350.0,-197159,-191299,-210614,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,103,1606.01,6590350.0,-197159,-191299,-210614,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,103,1606.01,6590350.0,-197159,-191299,-210614,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6654336
  counters:
    num_agent_steps_sampled: 6654336
    num_agent_steps_trained: 6654336
    num_env_steps_sampled: 6654336
    num_env_steps_trained: 6654336
  custom_metrics: {}
  date: 2022-10-14_12-37-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191299.0942261166
  episode_reward_mean: -196438.31189220317
  episode_reward_min: -210614.4027939816
  episodes_this_iter: 60
  episodes_total: 6648
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.024392604827881
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002407497726380825
          model: {}
          policy_loss: 0.0035545225255191326
          total_loss: 10.00373363494873
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,104,1621.68,6654340.0,-196438,-191299,-210614,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,104,1621.68,6654340.0,-196438,-191299,-210614,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,104,1621.68,6654340.0,-196438,-191299,-210614,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6718320
  counters:
    num_agent_steps_sampled: 6718320
    num_agent_steps_trained: 6718320
    num_env_steps_sampled: 6718320
    num_env_steps_trained: 6718320
  custom_metrics: {}
  date: 2022-10-14_12-37-34
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191437.47197951755
  episode_reward_mean: -196329.3758959074
  episode_reward_min: -204483.8761033816
  episodes_this_iter: 60
  episodes_total: 6708
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.018235921859741
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002099212259054184
          model: {}
          policy_loss: 0.0012371838092803955
          total_loss: 10.00135612487793
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,105,1637.65,6718320.0,-196329,-191437,-204484,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,105,1637.65,6718320.0,-196329,-191437,-204484,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,105,1637.65,6718320.0,-196329,-191437,-204484,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6782304
  counters:
    num_agent_steps_sampled: 6782304
    num_agent_steps_trained: 6782304
    num_env_steps_sampled: 6782304
    num_env_steps_trained: 6782304
  custom_metrics: {}
  date: 2022-10-14_12-37-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -191120.4162198066
  episode_reward_mean: -196000.91845057037
  episode_reward_min: -202025.45265193298
  episodes_this_iter: 72
  episodes_total: 6780
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0104191303253174
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0021656907629221678
          model: {}
          policy_loss: 0.005692791659384966
          total_loss: 10.005824089050293
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,106,1653.65,6782300.0,-196001,-191120,-202025,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,106,1653.65,6782300.0,-196001,-191120,-202025,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,106,1653.65,6782300.0,-196001,-191120,-202025,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6846288
  counters:
    num_agent_steps_sampled: 6846288
    num_agent_steps_trained: 6846288
    num_env_steps_sampled: 6846288
    num_env_steps_trained: 6846288
  custom_metrics: {}
  date: 2022-10-14_12-38-07
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -190040.3072940065
  episode_reward_mean: -195848.4353621638
  episode_reward_min: -207990.14039927555
  episodes_this_iter: 60
  episodes_total: 6840
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0070641040802
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0016554485773667693
          model: {}
          policy_loss: 0.004226282704621553
          total_loss: 10.004257202148438
          vf_explained_va

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,107,1669.93,6846290.0,-195848,-190040,-207990,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,107,1669.93,6846290.0,-195848,-190040,-207990,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,107,1669.93,6846290.0,-195848,-190040,-207990,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6910272
  counters:
    num_agent_steps_sampled: 6910272
    num_agent_steps_trained: 6910272
    num_env_steps_sampled: 6910272
    num_env_steps_trained: 6910272
  custom_metrics: {}
  date: 2022-10-14_12-38-22
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -190040.3072940065
  episode_reward_mean: -195939.40773981708
  episode_reward_min: -219798.43942692122
  episodes_this_iter: 60
  episodes_total: 6900
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 3.0011799335479736
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002226785058155656
          model: {}
          policy_loss: 0.0006265025585889816
          total_loss: 10.000771522521973
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,108,1685.51,6910270.0,-195939,-190040,-219798,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,108,1685.51,6910270.0,-195939,-190040,-219798,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,108,1685.51,6910270.0,-195939,-190040,-219798,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 6974256
  counters:
    num_agent_steps_sampled: 6974256
    num_agent_steps_trained: 6974256
    num_env_steps_sampled: 6974256
    num_env_steps_trained: 6974256
  custom_metrics: {}
  date: 2022-10-14_12-38-39
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -190453.63501521782
  episode_reward_mean: -196501.47989346142
  episode_reward_min: -219798.43942692122
  episodes_this_iter: 72
  episodes_total: 6972
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9918274879455566
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0018737147329375148
          model: {}
          policy_loss: 0.004109811037778854
          total_loss: 10.004185676574707
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,109,1701.88,6974260.0,-196501,-190454,-219798,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,109,1701.88,6974260.0,-196501,-190454,-219798,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,109,1701.88,6974260.0,-196501,-190454,-219798,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7038240
  counters:
    num_agent_steps_sampled: 7038240
    num_agent_steps_trained: 7038240
    num_env_steps_sampled: 7038240
    num_env_steps_trained: 7038240
  custom_metrics: {}
  date: 2022-10-14_12-38-55
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -190948.7451762392
  episode_reward_mean: -196266.6091879642
  episode_reward_min: -212856.63433354645
  episodes_this_iter: 60
  episodes_total: 7032
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.983985185623169
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0023628149647265673
          model: {}
          policy_loss: 0.003139384323731065
          total_loss: 10.003313064575195
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,110,1717.77,7038240.0,-196267,-190949,-212857,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,110,1717.77,7038240.0,-196267,-190949,-212857,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,110,1717.77,7038240.0,-196267,-190949,-212857,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7102224
  counters:
    num_agent_steps_sampled: 7102224
    num_agent_steps_trained: 7102224
    num_env_steps_sampled: 7102224
    num_env_steps_trained: 7102224
  custom_metrics: {}
  date: 2022-10-14_12-39-11
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189399.61783644816
  episode_reward_mean: -195452.09066737897
  episode_reward_min: -212856.63433354645
  episodes_this_iter: 60
  episodes_total: 7092
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9849698543548584
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002009602729231119
          model: {}
          policy_loss: 0.0008205441408790648
          total_loss: 10.000924110412598
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,111,1733.63,7102220.0,-195452,-189400,-212857,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,111,1733.63,7102220.0,-195452,-189400,-212857,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,111,1733.63,7102220.0,-195452,-189400,-212857,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7166208
  counters:
    num_agent_steps_sampled: 7166208
    num_agent_steps_trained: 7166208
    num_env_steps_sampled: 7166208
    num_env_steps_trained: 7166208
  custom_metrics: {}
  date: 2022-10-14_12-39-27
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189399.61783644816
  episode_reward_mean: -194889.10118889128
  episode_reward_min: -210502.88022200754
  episodes_this_iter: 72
  episodes_total: 7164
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9910898208618164
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002140353200957179
          model: {}
          policy_loss: 0.0022262465208768845
          total_loss: 10.002354621887207
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,112,1749.57,7166210.0,-194889,-189400,-210503,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,112,1749.57,7166210.0,-194889,-189400,-210503,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,112,1749.57,7166210.0,-194889,-189400,-210503,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7230192
  counters:
    num_agent_steps_sampled: 7230192
    num_agent_steps_trained: 7230192
    num_env_steps_sampled: 7230192
    num_env_steps_trained: 7230192
  custom_metrics: {}
  date: 2022-10-14_12-39-42
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189441.35088448302
  episode_reward_mean: -195285.61356435623
  episode_reward_min: -210502.88022200754
  episodes_this_iter: 60
  episodes_total: 7224
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9851322174072266
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017383708618581295
          model: {}
          policy_loss: 0.0023002743255347013
          total_loss: 10.002349853515625
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,113,1764.97,7230190.0,-195286,-189441,-210503,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,113,1764.97,7230190.0,-195286,-189441,-210503,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,113,1764.97,7230190.0,-195286,-189441,-210503,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7294176
  counters:
    num_agent_steps_sampled: 7294176
    num_agent_steps_trained: 7294176
    num_env_steps_sampled: 7294176
    num_env_steps_trained: 7294176
  custom_metrics: {}
  date: 2022-10-14_12-39-58
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189380.6258153018
  episode_reward_mean: -194884.0950353116
  episode_reward_min: -209990.5311433266
  episodes_this_iter: 60
  episodes_total: 7284
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9853625297546387
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0021493907552212477
          model: {}
          policy_loss: -0.0033631750848144293
          total_loss: 9.9967679977417
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,114,1780.98,7294180.0,-194884,-189381,-209991,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,114,1780.98,7294180.0,-194884,-189381,-209991,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,114,1780.98,7294180.0,-194884,-189381,-209991,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7358160
  counters:
    num_agent_steps_sampled: 7358160
    num_agent_steps_trained: 7358160
    num_env_steps_sampled: 7358160
    num_env_steps_trained: 7358160
  custom_metrics: {}
  date: 2022-10-14_12-40-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187420.44675768237
  episode_reward_mean: -193821.831322811
  episode_reward_min: -206920.28893360618
  episodes_this_iter: 72
  episodes_total: 7356
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.991173028945923
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019895019941031933
          model: {}
          policy_loss: 0.0016351460944861174
          total_loss: 10.00173568725586
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,115,1797.66,7358160.0,-193822,-187420,-206920,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,115,1797.66,7358160.0,-193822,-187420,-206920,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,115,1797.66,7358160.0,-193822,-187420,-206920,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7422144
  counters:
    num_agent_steps_sampled: 7422144
    num_agent_steps_trained: 7422144
    num_env_steps_sampled: 7422144
    num_env_steps_trained: 7422144
  custom_metrics: {}
  date: 2022-10-14_12-40-30
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187327.25755563864
  episode_reward_mean: -194131.34823340236
  episode_reward_min: -207058.8497800384
  episodes_this_iter: 60
  episodes_total: 7416
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9937405586242676
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019007906084880233
          model: {}
          policy_loss: 0.0061987098306417465
          total_loss: 10.006279945373535
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,116,1813.2,7422140.0,-194131,-187327,-207059,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,116,1813.2,7422140.0,-194131,-187327,-207059,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,116,1813.2,7422140.0,-194131,-187327,-207059,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7486128
  counters:
    num_agent_steps_sampled: 7486128
    num_agent_steps_trained: 7486128
    num_env_steps_sampled: 7486128
    num_env_steps_trained: 7486128
  custom_metrics: {}
  date: 2022-10-14_12-40-46
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187327.25755563864
  episode_reward_mean: -194720.9072794314
  episode_reward_min: -211403.1997556516
  episodes_this_iter: 60
  episodes_total: 7476
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.991116523742676
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002024339744821191
          model: {}
          policy_loss: 0.002042394829913974
          total_loss: 10.002148628234863
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,117,1829.28,7486130.0,-194721,-187327,-211403,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,117,1829.28,7486130.0,-194721,-187327,-211403,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,117,1829.28,7486130.0,-194721,-187327,-211403,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7550112
  counters:
    num_agent_steps_sampled: 7550112
    num_agent_steps_trained: 7550112
    num_env_steps_sampled: 7550112
    num_env_steps_trained: 7550112
  custom_metrics: {}
  date: 2022-10-14_12-41-02
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188716.02120988155
  episode_reward_mean: -194097.44825495034
  episode_reward_min: -210894.62049953197
  episodes_this_iter: 72
  episodes_total: 7548
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.989914655685425
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001534457434900105
          model: {}
          policy_loss: 0.0067589785903692245
          total_loss: 10.006766319274902
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,118,1844.77,7550110.0,-194097,-188716,-210895,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,118,1844.77,7550110.0,-194097,-188716,-210895,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,118,1844.77,7550110.0,-194097,-188716,-210895,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7614096
  counters:
    num_agent_steps_sampled: 7614096
    num_agent_steps_trained: 7614096
    num_env_steps_sampled: 7614096
    num_env_steps_trained: 7614096
  custom_metrics: {}
  date: 2022-10-14_12-41-18
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189322.3519958116
  episode_reward_mean: -193981.55895725722
  episode_reward_min: -202170.51686375836
  episodes_this_iter: 60
  episodes_total: 7608
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.994584321975708
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0021090295631438494
          model: {}
          policy_loss: 0.001280246302485466
          total_loss: 10.001402854919434
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,119,1860.82,7614100.0,-193982,-189322,-202171,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,119,1860.82,7614100.0,-193982,-189322,-202171,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,119,1860.82,7614100.0,-193982,-189322,-202171,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7678080
  counters:
    num_agent_steps_sampled: 7678080
    num_agent_steps_trained: 7678080
    num_env_steps_sampled: 7678080
    num_env_steps_trained: 7678080
  custom_metrics: {}
  date: 2022-10-14_12-41-33
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188938.74672374036
  episode_reward_mean: -193780.68102693823
  episode_reward_min: -202310.6468856208
  episodes_this_iter: 60
  episodes_total: 7668
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9885711669921875
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0020129969343543053
          model: {}
          policy_loss: 0.0012434907257556915
          total_loss: 10.001347541809082
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,120,1876.12,7678080.0,-193781,-188939,-202311,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,120,1876.12,7678080.0,-193781,-188939,-202311,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,120,1876.12,7678080.0,-193781,-188939,-202311,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7742064
  counters:
    num_agent_steps_sampled: 7742064
    num_agent_steps_trained: 7742064
    num_env_steps_sampled: 7742064
    num_env_steps_trained: 7742064
  custom_metrics: {}
  date: 2022-10-14_12-41-50
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187244.36006962426
  episode_reward_mean: -194498.5853613473
  episode_reward_min: -219718.75404474433
  episodes_this_iter: 72
  episodes_total: 7740
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.988121271133423
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019216041546314955
          model: {}
          policy_loss: 0.006630155723541975
          total_loss: 10.006715774536133
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,121,1892.69,7742060.0,-194499,-187244,-219719,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,121,1892.69,7742060.0,-194499,-187244,-219719,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,121,1892.69,7742060.0,-194499,-187244,-219719,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7806048
  counters:
    num_agent_steps_sampled: 7806048
    num_agent_steps_trained: 7806048
    num_env_steps_sampled: 7806048
    num_env_steps_trained: 7806048
  custom_metrics: {}
  date: 2022-10-14_12-42-06
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187244.36006962426
  episode_reward_mean: -193821.57982932363
  episode_reward_min: -202620.16189989937
  episodes_this_iter: 60
  episodes_total: 7800
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.987783670425415
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019863799680024385
          model: {}
          policy_loss: 0.0035051354207098484
          total_loss: 10.0036039352417
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,122,1908.38,7806050.0,-193822,-187244,-202620,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,122,1908.38,7806050.0,-193822,-187244,-202620,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,122,1908.38,7806050.0,-193822,-187244,-202620,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7870032
  counters:
    num_agent_steps_sampled: 7870032
    num_agent_steps_trained: 7870032
    num_env_steps_sampled: 7870032
    num_env_steps_trained: 7870032
  custom_metrics: {}
  date: 2022-10-14_12-42-22
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189418.47123512762
  episode_reward_mean: -193907.4438983875
  episode_reward_min: -201539.5768286346
  episodes_this_iter: 60
  episodes_total: 7860
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.989750623703003
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0023515475913882256
          model: {}
          policy_loss: -0.0004245741292834282
          total_loss: 9.999746322631836
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,123,1924.39,7870030.0,-193907,-189418,-201540,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,123,1924.39,7870030.0,-193907,-189418,-201540,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,123,1924.39,7870030.0,-193907,-189418,-201540,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7934016
  counters:
    num_agent_steps_sampled: 7934016
    num_agent_steps_trained: 7934016
    num_env_steps_sampled: 7934016
    num_env_steps_trained: 7934016
  custom_metrics: {}
  date: 2022-10-14_12-42-37
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189965.29257176977
  episode_reward_mean: -194352.85060781057
  episode_reward_min: -209680.31359992968
  episodes_this_iter: 72
  episodes_total: 7932
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.985276937484741
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0023891045711934566
          model: {}
          policy_loss: 0.005140863824635744
          total_loss: 10.00532054901123
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,124,1939.78,7934020.0,-194353,-189965,-209680,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,124,1939.78,7934020.0,-194353,-189965,-209680,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,124,1939.78,7934020.0,-194353,-189965,-209680,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 7998000
  counters:
    num_agent_steps_sampled: 7998000
    num_agent_steps_trained: 7998000
    num_env_steps_sampled: 7998000
    num_env_steps_trained: 7998000
  custom_metrics: {}
  date: 2022-10-14_12-42-53
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189678.7560356628
  episode_reward_mean: -194014.93108476623
  episode_reward_min: -209680.31359992968
  episodes_this_iter: 60
  episodes_total: 7992
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9803988933563232
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00228344788774848
          model: {}
          policy_loss: 0.003052324056625366
          total_loss: 10.003210067749023
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,125,1955.79,7998000.0,-194015,-189679,-209680,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,125,1955.79,7998000.0,-194015,-189679,-209680,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,125,1955.79,7998000.0,-194015,-189679,-209680,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8061984
  counters:
    num_agent_steps_sampled: 8061984
    num_agent_steps_trained: 8061984
    num_env_steps_sampled: 8061984
    num_env_steps_trained: 8061984
  custom_metrics: {}
  date: 2022-10-14_12-43-09
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188160.24334307518
  episode_reward_mean: -193342.29639524536
  episode_reward_min: -204898.39382509515
  episodes_this_iter: 60
  episodes_total: 8052
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9759788513183594
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0022047567181289196
          model: {}
          policy_loss: 0.0016015078872442245
          total_loss: 10.001745223999023
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,126,1971.2,8061980.0,-193342,-188160,-204898,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,126,1971.2,8061980.0,-193342,-188160,-204898,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,126,1971.2,8061980.0,-193342,-188160,-204898,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8125968
  counters:
    num_agent_steps_sampled: 8125968
    num_agent_steps_trained: 8125968
    num_env_steps_sampled: 8125968
    num_env_steps_trained: 8125968
  custom_metrics: {}
  date: 2022-10-14_12-43-25
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188673.34245822817
  episode_reward_mean: -194044.36271872278
  episode_reward_min: -208702.47764539346
  episodes_this_iter: 72
  episodes_total: 8124
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9791786670684814
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0021340397652238607
          model: {}
          policy_loss: 0.0054238587617874146
          total_loss: 10.005553245544434
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,127,1987.24,8125970.0,-194044,-188673,-208702,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,127,1987.24,8125970.0,-194044,-188673,-208702,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,127,1987.24,8125970.0,-194044,-188673,-208702,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8189952
  counters:
    num_agent_steps_sampled: 8189952
    num_agent_steps_trained: 8189952
    num_env_steps_sampled: 8189952
    num_env_steps_trained: 8189952
  custom_metrics: {}
  date: 2022-10-14_12-43-41
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188673.34245822817
  episode_reward_mean: -194029.70259250642
  episode_reward_min: -214342.44287236602
  episodes_this_iter: 60
  episodes_total: 8184
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.968961238861084
          entropy_coeff: 9.999999747378752e-05
          kl: 0.00192514737136662
          model: {}
          policy_loss: 0.00380867812782526
          total_loss: 10.003896713256836
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,128,2003.02,8189950.0,-194030,-188673,-214342,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,128,2003.02,8189950.0,-194030,-188673,-214342,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,128,2003.02,8189950.0,-194030,-188673,-214342,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8253936
  counters:
    num_agent_steps_sampled: 8253936
    num_agent_steps_trained: 8253936
    num_env_steps_sampled: 8253936
    num_env_steps_trained: 8253936
  custom_metrics: {}
  date: 2022-10-14_12-43-56
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188776.12023703955
  episode_reward_mean: -194042.09858714946
  episode_reward_min: -218101.75207521368
  episodes_this_iter: 60
  episodes_total: 8244
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9602746963500977
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002211654093116522
          model: {}
          policy_loss: 0.002994619309902191
          total_loss: 10.003141403198242
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,129,2018.62,8253940.0,-194042,-188776,-218102,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,129,2018.62,8253940.0,-194042,-188776,-218102,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,129,2018.62,8253940.0,-194042,-188776,-218102,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8317920
  counters:
    num_agent_steps_sampled: 8317920
    num_agent_steps_trained: 8317920
    num_env_steps_sampled: 8317920
    num_env_steps_trained: 8317920
  custom_metrics: {}
  date: 2022-10-14_12-44-12
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187884.93637135488
  episode_reward_mean: -193257.23516601173
  episode_reward_min: -218101.75207521368
  episodes_this_iter: 72
  episodes_total: 8316
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.964703321456909
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001890482846647501
          model: {}
          policy_loss: 0.005353663116693497
          total_loss: 10.0054349899292
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,130,2034.73,8317920.0,-193257,-187885,-218102,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,130,2034.73,8317920.0,-193257,-187885,-218102,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8381904
  counters:
    num_agent_steps_sampled: 8381904
    num_agent_steps_trained: 8381904
    num_env_steps_sampled: 8381904
    num_env_steps_trained: 8381904
  custom_metrics: {}
  date: 2022-10-14_12-44-28
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187884.93637135488
  episode_reward_mean: -192500.41192648245
  episode_reward_min: -201662.3335482013
  episodes_this_iter: 60
  episodes_total: 8376
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.961832284927368
          entropy_coeff: 9.999999747378752e-05
          kl: 0.001963771879673004
          model: {}
          policy_loss: 0.003964942879974842
          total_loss: 10.004060745239258
          vf_explained_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,131,2050.11,8381900.0,-192500,-187885,-201662,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,131,2050.11,8381900.0,-192500,-187885,-201662,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,131,2050.11,8381900.0,-192500,-187885,-201662,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8445888
  counters:
    num_agent_steps_sampled: 8445888
    num_agent_steps_trained: 8445888
    num_env_steps_sampled: 8445888
    num_env_steps_trained: 8445888
  custom_metrics: {}
  date: 2022-10-14_12-44-43
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188460.9563162868
  episode_reward_mean: -193754.23423643806
  episode_reward_min: -217789.99057153985
  episodes_this_iter: 60
  episodes_total: 8436
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.963819742202759
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0022496571764349937
          model: {}
          policy_loss: 0.0014378591440618038
          total_loss: 10.001591682434082
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,132,2065.82,8445890.0,-193754,-188461,-217790,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,132,2065.82,8445890.0,-193754,-188461,-217790,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,132,2065.82,8445890.0,-193754,-188461,-217790,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8509872
  counters:
    num_agent_steps_sampled: 8509872
    num_agent_steps_trained: 8509872
    num_env_steps_sampled: 8509872
    num_env_steps_trained: 8509872
  custom_metrics: {}
  date: 2022-10-14_12-44-59
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188597.53809183562
  episode_reward_mean: -193189.17461916953
  episode_reward_min: -203931.0980834958
  episodes_this_iter: 72
  episodes_total: 8508
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9517805576324463
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0025149488355964422
          model: {}
          policy_loss: 0.004018332343548536
          total_loss: 10.004226684570312
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,133,2081.21,8509870.0,-193189,-188598,-203931,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,133,2081.21,8509870.0,-193189,-188598,-203931,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,133,2081.21,8509870.0,-193189,-188598,-203931,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8573856
  counters:
    num_agent_steps_sampled: 8573856
    num_agent_steps_trained: 8573856
    num_env_steps_sampled: 8573856
    num_env_steps_trained: 8573856
  custom_metrics: {}
  date: 2022-10-14_12-45-15
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188597.53809183562
  episode_reward_mean: -193737.4670043215
  episode_reward_min: -217446.60803897856
  episodes_this_iter: 60
  episodes_total: 8568
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.942706346511841
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019276465754956007
          model: {}
          policy_loss: 0.003338882001116872
          total_loss: 10.003429412841797
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,134,2096.83,8573860.0,-193737,-188598,-217447,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,134,2096.83,8573860.0,-193737,-188598,-217447,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,134,2096.83,8573860.0,-193737,-188598,-217447,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8637840
  counters:
    num_agent_steps_sampled: 8637840
    num_agent_steps_trained: 8637840
    num_env_steps_sampled: 8637840
    num_env_steps_trained: 8637840
  custom_metrics: {}
  date: 2022-10-14_12-45-30
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188701.28213261205
  episode_reward_mean: -193582.69630342178
  episode_reward_min: -217446.60803897856
  episodes_this_iter: 60
  episodes_total: 8628
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9446816444396973
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002011392032727599
          model: {}
          policy_loss: -0.002504633506760001
          total_loss: 9.997602462768555
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,135,2112.07,8637840.0,-193583,-188701,-217447,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,135,2112.07,8637840.0,-193583,-188701,-217447,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,135,2112.07,8637840.0,-193583,-188701,-217447,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8701824
  counters:
    num_agent_steps_sampled: 8701824
    num_agent_steps_trained: 8701824
    num_env_steps_sampled: 8701824
    num_env_steps_trained: 8701824
  custom_metrics: {}
  date: 2022-10-14_12-45-45
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188171.7733783317
  episode_reward_mean: -194275.0021539623
  episode_reward_min: -212194.44421172366
  episodes_this_iter: 72
  episodes_total: 8700
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9309675693511963
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0023203380405902863
          model: {}
          policy_loss: 0.0012992977863177657
          total_loss: 10.001470565795898
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,136,2127.7,8701820.0,-194275,-188172,-212194,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,136,2127.7,8701820.0,-194275,-188172,-212194,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,136,2127.7,8701820.0,-194275,-188172,-212194,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8765808
  counters:
    num_agent_steps_sampled: 8765808
    num_agent_steps_trained: 8765808
    num_env_steps_sampled: 8765808
    num_env_steps_trained: 8765808
  custom_metrics: {}
  date: 2022-10-14_12-46-01
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188171.7733783317
  episode_reward_mean: -193986.69996717863
  episode_reward_min: -212194.44421172366
  episodes_this_iter: 60
  episodes_total: 8760
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.927401304244995
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0019268888281658292
          model: {}
          policy_loss: 0.007210954092442989
          total_loss: 10.007303237915039
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,137,2142.88,8765810.0,-193987,-188172,-212194,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,137,2142.88,8765810.0,-193987,-188172,-212194,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,137,2142.88,8765810.0,-193987,-188172,-212194,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8829792
  counters:
    num_agent_steps_sampled: 8829792
    num_agent_steps_trained: 8829792
    num_env_steps_sampled: 8829792
    num_env_steps_trained: 8829792
  custom_metrics: {}
  date: 2022-10-14_12-46-16
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188306.12249343802
  episode_reward_mean: -193475.20540238862
  episode_reward_min: -213626.7281106848
  episodes_this_iter: 60
  episodes_total: 8820
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9353628158569336
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0018989498494192958
          model: {}
          policy_loss: 0.0035921090748161077
          total_loss: 10.003679275512695
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,138,2158.59,8829790.0,-193475,-188306,-213627,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,138,2158.59,8829790.0,-193475,-188306,-213627,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,138,2158.59,8829790.0,-193475,-188306,-213627,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8893776
  counters:
    num_agent_steps_sampled: 8893776
    num_agent_steps_trained: 8893776
    num_env_steps_sampled: 8893776
    num_env_steps_trained: 8893776
  custom_metrics: {}
  date: 2022-10-14_12-46-33
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188335.84933179902
  episode_reward_mean: -193122.84968029297
  episode_reward_min: -211474.26867835879
  episodes_this_iter: 72
  episodes_total: 8892
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.939621686935425
          entropy_coeff: 9.999999747378752e-05
          kl: 0.002108360407873988
          model: {}
          policy_loss: -0.0006705960258841515
          total_loss: 9.999457359313965
          vf_explaine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,139,2175.02,8893780.0,-193123,-188336,-211474,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,139,2175.02,8893780.0,-193123,-188336,-211474,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,139,2175.02,8893780.0,-193123,-188336,-211474,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 8957760
  counters:
    num_agent_steps_sampled: 8957760
    num_agent_steps_trained: 8957760
    num_env_steps_sampled: 8957760
    num_env_steps_trained: 8957760
  custom_metrics: {}
  date: 2022-10-14_12-46-48
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188858.4824345231
  episode_reward_mean: -193649.8372005031
  episode_reward_min: -215769.1343579096
  episodes_this_iter: 60
  episodes_total: 8952
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9366681575775146
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0016847060760483146
          model: {}
          policy_loss: 0.00583272147923708
          total_loss: 10.005876541137695
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,140,2190.22,8957760.0,-193650,-188858,-215769,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,140,2190.22,8957760.0,-193650,-188858,-215769,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,140,2190.22,8957760.0,-193650,-188858,-215769,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 9021744
  counters:
    num_agent_steps_sampled: 9021744
    num_agent_steps_trained: 9021744
    num_env_steps_sampled: 9021744
    num_env_steps_trained: 9021744
  custom_metrics: {}
  date: 2022-10-14_12-47-04
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188331.03618033737
  episode_reward_mean: -195424.59945926006
  episode_reward_min: -244725.71339162154
  episodes_this_iter: 60
  episodes_total: 9012
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9382898807525635
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0018547696527093649
          model: {}
          policy_loss: 0.0017099472461268306
          total_loss: 10.001787185668945
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,141,2206.29,9021740.0,-195425,-188331,-244726,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,141,2206.29,9021740.0,-195425,-188331,-244726,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,141,2206.29,9021740.0,-195425,-188331,-244726,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 9085728
  counters:
    num_agent_steps_sampled: 9085728
    num_agent_steps_trained: 9085728
    num_env_steps_sampled: 9085728
    num_env_steps_trained: 9085728
  custom_metrics: {}
  date: 2022-10-14_12-47-20
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -189119.2249950669
  episode_reward_mean: -194226.18683386172
  episode_reward_min: -244725.71339162154
  episodes_this_iter: 72
  episodes_total: 9084
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.939167022705078
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0016248460160568357
          model: {}
          policy_loss: 0.0063977325335145
          total_loss: 10.006428718566895
          vf_explained_v

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,142,2221.91,9085730.0,-194226,-189119,-244726,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,142,2221.91,9085730.0,-194226,-189119,-244726,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,142,2221.91,9085730.0,-194226,-189119,-244726,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 9149712
  counters:
    num_agent_steps_sampled: 9149712
    num_agent_steps_trained: 9149712
    num_env_steps_sampled: 9149712
    num_env_steps_trained: 9149712
  custom_metrics: {}
  date: 2022-10-14_12-47-36
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188362.73100947018
  episode_reward_mean: -194395.79694661553
  episode_reward_min: -220175.5933046717
  episodes_this_iter: 60
  episodes_total: 9144
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9229722023010254
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0023651549126952887
          model: {}
          policy_loss: 0.0017895549535751343
          total_loss: 10.001970291137695
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,143,2238.02,9149710.0,-194396,-188363,-220176,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,143,2238.02,9149710.0,-194396,-188363,-220176,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 9213696
  counters:
    num_agent_steps_sampled: 9213696
    num_agent_steps_trained: 9213696
    num_env_steps_sampled: 9213696
    num_env_steps_trained: 9213696
  custom_metrics: {}
  date: 2022-10-14_12-47-51
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188973.80251063875
  episode_reward_mean: -194945.18636626416
  episode_reward_min: -224474.71976204612
  episodes_this_iter: 60
  episodes_total: 9204
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9296135902404785
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017119917320087552
          model: {}
          policy_loss: 0.0021296690683811903
          total_loss: 10.002180099487305
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,144,2253.17,9213700.0,-194945,-188974,-224475,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,144,2253.17,9213700.0,-194945,-188974,-224475,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,144,2253.17,9213700.0,-194945,-188974,-224475,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,144,2253.17,9213700.0,-194945,-188974,-224475,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 9277680
  counters:
    num_agent_steps_sampled: 9277680
    num_agent_steps_trained: 9277680
    num_env_steps_sampled: 9277680
    num_env_steps_trained: 9277680
  custom_metrics: {}
  date: 2022-10-14_12-48-07
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -186738.2569470033
  episode_reward_mean: -195122.0185361197
  episode_reward_min: -240778.7402475682
  episodes_this_iter: 72
  episodes_total: 9276
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9274704456329346
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0022702349815517664
          model: {}
          policy_loss: 0.0032234375830739737
          total_loss: 10.003384590148926
          vf_explained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,145,2268.88,9277680.0,-195122,-186738,-240779,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,145,2268.88,9277680.0,-195122,-186738,-240779,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,145,2268.88,9277680.0,-195122,-186738,-240779,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 9341664
  counters:
    num_agent_steps_sampled: 9341664
    num_agent_steps_trained: 9341664
    num_env_steps_sampled: 9341664
    num_env_steps_trained: 9341664
  custom_metrics: {}
  date: 2022-10-14_12-48-23
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -187296.62922108348
  episode_reward_mean: -194944.20631054798
  episode_reward_min: -217968.84418870584
  episodes_this_iter: 60
  episodes_total: 9336
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.9181976318359375
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0017985646845772862
          model: {}
          policy_loss: 0.0003789197653532028
          total_loss: 10.000447273254395
          vf_explai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,146,2285.21,9341660.0,-194944,-187297,-217969,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,146,2285.21,9341660.0,-194944,-187297,-217969,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,146,2285.21,9341660.0,-194944,-187297,-217969,1000


Result for PPOTrainer_IBGym-v1_5b58d_00000:
  agent_timesteps_total: 9405648
  counters:
    num_agent_steps_sampled: 9405648
    num_agent_steps_trained: 9405648
    num_env_steps_sampled: 9405648
    num_env_steps_trained: 9405648
  custom_metrics: {}
  date: 2022-10-14_12-48-39
  done: false
  episode_len_mean: 1000.0
  episode_media: {}
  episode_reward_max: -188126.03252839172
  episode_reward_mean: -195572.46924091992
  episode_reward_min: -233796.34798798428
  episodes_this_iter: 60
  episodes_total: 9396
  experiment_id: 60b1151648774b57ab89dde1fea079b7
  hostname: hamza-Legion-5-15ACH6H
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 2.9999999242136255e-05
          entropy: 2.916558027267456
          entropy_coeff: 9.999999747378752e-05
          kl: 0.0016570857260376215
          model: {}
          policy_loss: 0.0030780716333538294
          total_loss: 10.003118515014648
          vf_explain

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,147,2300.72,9405650.0,-195572,-188126,-233796,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,147,2300.72,9405650.0,-195572,-188126,-233796,1000




Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,147,2300.72,9405650.0,-195572,-188126,-233796,1000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPOTrainer_IBGym-v1_5b58d_00000,RUNNING,192.168.0.185:344251,147,2300.72,9405650.0,-195572,-188126,-233796,1000


2022-10-14 12:48:57,155	ERROR tune.py:743 -- Trials did not complete: [PPOTrainer_IBGym-v1_5b58d_00000]
2022-10-14 12:48:57,156	INFO tune.py:747 -- Total run time: 2339.42 seconds (2337.31 seconds for the tuning loop).


In [6]:



last_checkpoint = results.get_last_checkpoint()._local_path
config["num_workers"] = 1

lstm_ppo_policy = LSTMPPOPolicy(config=config,checkpoint_path=last_checkpoint)
save_path = 'ppo/lstm_ppo_policy.pkl'
lstm_ppo_policy.save(save_path)

In [7]:
results.get_last_checkpoint()._local_path

'/home/hamza/PycharmProjects/StateCompression/tmp/ray_exp_logs/industrial_benchmark/PPOTrainer_IBGym-v1_5b58d_00000_0_2022-10-14_12-09-57/checkpoint_000145/checkpoint-145'