# Stable baselines Multi agent


In [2]:
# Imports

import os
import gym
import slimevolleygym
from slimevolleygym import SurvivalRewardEnv

from stable_baselines.ppo1 import PPO1
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import logger
from stable_baselines.common.callbacks import EvalCallback

In [9]:
# setting up environemnt
env = gym.make("SlimeVolley-v0")

# reset the environment
obs = env.reset()
done= False
total_reward = 0

while not done:
    
    # taking random actions
    action = env.action_space.sample()
    obs, reward, done, info = env.step(action)
    total_reward += reward
    env.render()

print("score:", total_reward)

score: -5


# PPO 
### Proximal Policy Optimization
It is a Reinforcement Learning algorithm built by OpenAI gym

In [12]:
NUM_TIMESTEPS = int(2e7)
SEED = 721
EVAL_FREQ = 250000
EVAL_EPISODES = 100
LOGDIR = "ppo1" # moved to zoo afterwards.

logger.configure(folder=LOGDIR)

env = gym.make("SlimeVolley-v0")
env.seed(SEED)

# take mujoco hyperparams (but doubled timesteps_per_actorbatch to cover more steps.)
model = PPO1(MlpPolicy, env, timesteps_per_actorbatch=4096, clip_param=0.2, entcoeff=0.0, optim_epochs=10,
                 optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=2)

eval_callback = EvalCallback(env, best_model_save_path=LOGDIR, log_path=LOGDIR, eval_freq=EVAL_FREQ, n_eval_episodes=EVAL_EPISODES)

model.learn(total_timesteps=NUM_TIMESTEPS, callback=eval_callback)

Logging to ppo1
********** Iteration 0 ************


  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     8.37e-06 |       0.00000 |       0.03886 |       0.00022 |       2.07924
     -0.00232 |       0.00000 |       0.02201 |       0.00143 |       2.07808
     -0.00485 |       0.00000 |       0.01930 |       0.00386 |       2.07569
     -0.00619 |       0.00000 |       0.01770 |       0.00603 |       2.07357
     -0.00693 |       0.00000 |       0.01680 |       0.00767 |       2.07196
     -0.00748 |       0.00000 |       0.01606 |       0.00738 |       2.07225
     -0.00771 |       0.00000 |       0.01552 |       0.00744 |       2.07219
     -0.00819 |       0.00000 |       0.01511 |       0.00810 |       2.07155
     -0.00825 |       0.00000 |       0.01494 |       0.00773 |       2.07191
     -0.00859 |       0.00000 |       0.01455 |       0.00885 |       2.07081
Evaluating losses...
     -0.00962 |       0.00000 |       0.01430 |       0.00845 |       2.07121
-----------------------------

     -0.00889 |       0.00000 |       0.01746 |       0.00729 |       2.00806
     -0.00900 |       0.00000 |       0.01759 |       0.00831 |       2.00797
Evaluating losses...
     -0.01069 |       0.00000 |       0.01721 |       0.00780 |       2.00995
----------------------------------
| EpLenMean       | 544          |
| EpRewMean       | -4.89        |
| EpThisIter      | 8            |
| EpisodesSoFar   | 45           |
| TimeElapsed     | 20.4         |
| TimestepsSoFar  | 24576        |
| ev_tdlam_before | 0.82         |
| loss_ent        | 2.0099514    |
| loss_kl         | 0.007797507  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010685639 |
| loss_vf_loss    | 0.017214114  |
----------------------------------
********** Iteration 6 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     6.79e-05 |       0.00000 |       0.01585 |       0.00070 |       2.00622
     -0.00322 |       0.00000 |       0.01505 |  

********** Iteration 11 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00078 |       0.00000 |       0.02128 |       0.00071 |       1.97368
     -0.00506 |       0.00000 |       0.01985 |       0.00443 |       1.97102
     -0.00638 |       0.00000 |       0.01934 |       0.00748 |       1.96275
     -0.00754 |       0.00000 |       0.01866 |       0.00723 |       1.96379
     -0.00851 |       0.00000 |       0.01832 |       0.00719 |       1.96154
     -0.00861 |       0.00000 |       0.01812 |       0.00770 |       1.96057
     -0.00903 |       0.00000 |       0.01776 |       0.00853 |       1.95518
     -0.00978 |       0.00000 |       0.01784 |       0.00716 |       1.95999
     -0.01028 |       0.00000 |       0.01745 |       0.00849 |       1.95921
     -0.01053 |       0.00000 |       0.01713 |       0.00707 |       1.96081
Evaluating losses...
     -0.01227 |       0.00000 |       0.01674 |       0.00772 |       

     -0.00839 |       0.00000 |       0.01024 |       0.00729 |       1.94321
     -0.00846 |       0.00000 |       0.01004 |       0.00619 |       1.94453
     -0.00946 |       0.00000 |       0.01006 |       0.00721 |       1.94501
Evaluating losses...
     -0.01092 |       0.00000 |       0.00976 |       0.00793 |       1.94325
----------------------------------
| EpLenMean       | 578          |
| EpRewMean       | -4.89        |
| EpThisIter      | 7            |
| EpisodesSoFar   | 121          |
| TimeElapsed     | 56.5         |
| TimestepsSoFar  | 69632        |
| ev_tdlam_before | 0.881        |
| loss_ent        | 1.9432546    |
| loss_kl         | 0.007934336  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010917269 |
| loss_vf_loss    | 0.009756893  |
----------------------------------
********** Iteration 17 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00027 |       0.00000 |       0.01167 | 

********** Iteration 22 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00144 |       0.00000 |       0.01758 |       0.00114 |       1.88309
     -0.00780 |       0.00000 |       0.01700 |       0.00396 |       1.87558
     -0.00956 |       0.00000 |       0.01649 |       0.00646 |       1.86226
     -0.01072 |       0.00000 |       0.01620 |       0.00676 |       1.85946
     -0.01218 |       0.00000 |       0.01595 |       0.00782 |       1.85477
     -0.01254 |       0.00000 |       0.01566 |       0.00821 |       1.85131
     -0.01272 |       0.00000 |       0.01544 |       0.00854 |       1.85093
     -0.01372 |       0.00000 |       0.01543 |       0.00893 |       1.84674
     -0.01413 |       0.00000 |       0.01503 |       0.00842 |       1.84921
     -0.01484 |       0.00000 |       0.01500 |       0.00877 |       1.84744
Evaluating losses...
     -0.01658 |       0.00000 |       0.01470 |       0.00886 |       

     -0.01231 |       0.00000 |       0.01266 |       0.00781 |       1.75895
     -0.01243 |       0.00000 |       0.01257 |       0.00763 |       1.76262
Evaluating losses...
     -0.01494 |       0.00000 |       0.01219 |       0.00703 |       1.76260
----------------------------------
| EpLenMean       | 616          |
| EpRewMean       | -4.96        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 193          |
| TimeElapsed     | 91           |
| TimestepsSoFar  | 114688       |
| ev_tdlam_before | 0.826        |
| loss_ent        | 1.7626021    |
| loss_kl         | 0.007032189  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014941731 |
| loss_vf_loss    | 0.0121871345 |
----------------------------------
********** Iteration 28 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00081 |       0.00000 |       0.01545 |       0.00192 |       1.76567
     -0.00565 |       0.00000 |       0.01460 | 

********** Iteration 33 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00169 |       0.00000 |       0.01536 |       0.00166 |       1.69409
     -0.00553 |       0.00000 |       0.01418 |       0.00400 |       1.69995
     -0.00701 |       0.00000 |       0.01379 |       0.00399 |       1.69305
     -0.00857 |       0.00000 |       0.01346 |       0.00565 |       1.69187
     -0.00968 |       0.00000 |       0.01326 |       0.00551 |       1.68856
     -0.01094 |       0.00000 |       0.01312 |       0.00559 |       1.68854
     -0.01202 |       0.00000 |       0.01295 |       0.00558 |       1.68795
     -0.01285 |       0.00000 |       0.01281 |       0.00637 |       1.68119
     -0.01417 |       0.00000 |       0.01275 |       0.00597 |       1.68612
     -0.01522 |       0.00000 |       0.01261 |       0.00673 |       1.68684
Evaluating losses...
     -0.01713 |       0.00000 |       0.01313 |       0.00634 |       

     -0.01040 |       0.00000 |       0.01395 |       0.00571 |       1.65603
     -0.01102 |       0.00000 |       0.01370 |       0.00589 |       1.65723
Evaluating losses...
     -0.01307 |       0.00000 |       0.01358 |       0.00575 |       1.65184
----------------------------------
| EpLenMean       | 636          |
| EpRewMean       | -4.86        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 263          |
| TimeElapsed     | 124          |
| TimestepsSoFar  | 159744       |
| ev_tdlam_before | 0.823        |
| loss_ent        | 1.6518399    |
| loss_kl         | 0.0057501863 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01307351  |
| loss_vf_loss    | 0.013581318  |
----------------------------------
********** Iteration 39 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00080 |       0.00000 |       0.02028 |       0.00116 |       1.64768
     -0.00434 |       0.00000 |       0.01811 | 

********** Iteration 44 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00041 |       0.00000 |       0.01549 |       0.00140 |       1.62646
     -0.00570 |       0.00000 |       0.01433 |       0.00417 |       1.62070
     -0.00819 |       0.00000 |       0.01376 |       0.00582 |       1.61382
     -0.00920 |       0.00000 |       0.01336 |       0.00576 |       1.61587
     -0.00961 |       0.00000 |       0.01305 |       0.00617 |       1.61488
     -0.01140 |       0.00000 |       0.01299 |       0.00652 |       1.61101
     -0.01167 |       0.00000 |       0.01288 |       0.00709 |       1.61690
     -0.01303 |       0.00000 |       0.01287 |       0.00659 |       1.61668
     -0.01368 |       0.00000 |       0.01253 |       0.00807 |       1.61202
     -0.01401 |       0.00000 |       0.01238 |       0.00716 |       1.61167
Evaluating losses...
     -0.01638 |       0.00000 |       0.01208 |       0.00790 |       

     -0.01320 |       0.00000 |       0.01237 |       0.00601 |       1.51604
     -0.01301 |       0.00000 |       0.01220 |       0.00559 |       1.51444
Evaluating losses...
     -0.01626 |       0.00000 |       0.01215 |       0.00626 |       1.51246
----------------------------------
| EpLenMean       | 625          |
| EpRewMean       | -4.88        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 336          |
| TimeElapsed     | 159          |
| TimestepsSoFar  | 204800       |
| ev_tdlam_before | 0.833        |
| loss_ent        | 1.5124613    |
| loss_kl         | 0.0062556653 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01625692  |
| loss_vf_loss    | 0.012146282  |
----------------------------------
********** Iteration 50 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00097 |       0.00000 |       0.01733 |       0.00142 |       1.52332
     -0.00449 |       0.00000 |       0.01575 | 

********** Iteration 55 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00029 |       0.00000 |       0.01688 |       0.00090 |       1.45579
     -0.00672 |       0.00000 |       0.01497 |       0.00485 |       1.44713
     -0.00810 |       0.00000 |       0.01416 |       0.00696 |       1.43284
     -0.00966 |       0.00000 |       0.01361 |       0.00663 |       1.44277
     -0.01092 |       0.00000 |       0.01325 |       0.00754 |       1.44015
     -0.01150 |       0.00000 |       0.01312 |       0.00653 |       1.43621
     -0.01269 |       0.00000 |       0.01283 |       0.00770 |       1.44277
     -0.01351 |       0.00000 |       0.01259 |       0.00717 |       1.44028
     -0.01462 |       0.00000 |       0.01250 |       0.00702 |       1.43569
     -0.01505 |       0.00000 |       0.01236 |       0.00754 |       1.43916
Evaluating losses...
     -0.01742 |       0.00000 |       0.01196 |       0.00797 |       

     -0.01271 |       0.00000 |       0.01458 |       0.00644 |       1.38024
     -0.01335 |       0.00000 |       0.01427 |       0.00617 |       1.38080
Evaluating losses...
     -0.01565 |       0.00000 |       0.01390 |       0.00695 |       1.37887
---------------------------------
| EpLenMean       | 619         |
| EpRewMean       | -4.83       |
| EpThisIter      | 6           |
| EpisodesSoFar   | 409         |
| TimeElapsed     | 191         |
| TimestepsSoFar  | 249856      |
| ev_tdlam_before | 0.739       |
| loss_ent        | 1.3788682   |
| loss_kl         | 0.006953921 |
| loss_pol_entpen | 0.0         |
| loss_pol_surr   | -0.01565308 |
| loss_vf_loss    | 0.013899798 |
---------------------------------
********** Iteration 61 ************
Eval num_timesteps=249856, episode_reward=-4.86 +/- 0.37
Episode length: 596.76 +/- 112.53
New best mean reward!
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00052 |       0.000

********** Iteration 66 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00078 |       0.00000 |       0.02136 |       0.00102 |       1.42483
     -0.00463 |       0.00000 |       0.01892 |       0.00355 |       1.43879
     -0.00654 |       0.00000 |       0.01739 |       0.00549 |       1.43627
     -0.00908 |       0.00000 |       0.01694 |       0.00576 |       1.43591
     -0.00948 |       0.00000 |       0.01639 |       0.00564 |       1.43316
     -0.01065 |       0.00000 |       0.01575 |       0.00556 |       1.43050
     -0.01228 |       0.00000 |       0.01544 |       0.00629 |       1.43452
     -0.01328 |       0.00000 |       0.01525 |       0.00646 |       1.42298
     -0.01379 |       0.00000 |       0.01489 |       0.00655 |       1.42537
     -0.01476 |       0.00000 |       0.01463 |       0.00650 |       1.42585
Evaluating losses...
     -0.01710 |       0.00000 |       0.01427 |       0.00690 |       

     -0.01443 |       0.00000 |       0.01123 |       0.00601 |       1.43707
     -0.01559 |       0.00000 |       0.01094 |       0.00641 |       1.43644
Evaluating losses...
     -0.01791 |       0.00000 |       0.01052 |       0.00592 |       1.43095
----------------------------------
| EpLenMean       | 659          |
| EpRewMean       | -4.82        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 477          |
| TimeElapsed     | 257          |
| TimestepsSoFar  | 294912       |
| ev_tdlam_before | 0.859        |
| loss_ent        | 1.4309452    |
| loss_kl         | 0.00591971   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.017912945 |
| loss_vf_loss    | 0.010524901  |
----------------------------------
********** Iteration 72 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00025 |       0.00000 |       0.01528 |       0.00131 |       1.39303
     -0.00675 |       0.00000 |       0.01338 | 

********** Iteration 77 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00086 |       0.00000 |       0.01417 |       0.00218 |       1.36573
     -0.00473 |       0.00000 |       0.01279 |       0.00465 |       1.36320
     -0.00707 |       0.00000 |       0.01227 |       0.00711 |       1.36666
     -0.00861 |       0.00000 |       0.01197 |       0.00550 |       1.36150
     -0.01009 |       0.00000 |       0.01196 |       0.00564 |       1.35819
     -0.01085 |       0.00000 |       0.01152 |       0.00667 |       1.36302
     -0.01250 |       0.00000 |       0.01157 |       0.00696 |       1.36282
     -0.01360 |       0.00000 |       0.01142 |       0.00612 |       1.36061
     -0.01360 |       0.00000 |       0.01138 |       0.00720 |       1.36755
     -0.01545 |       0.00000 |       0.01109 |       0.00750 |       1.36098
Evaluating losses...
     -0.01727 |       0.00000 |       0.01071 |       0.00818 |       

     -0.01412 |       0.00000 |       0.01632 |       0.00559 |       1.34134
     -0.01547 |       0.00000 |       0.01606 |       0.00564 |       1.34364
Evaluating losses...
     -0.01792 |       0.00000 |       0.01573 |       0.00656 |       1.34178
----------------------------------
| EpLenMean       | 695          |
| EpRewMean       | -4.8         |
| EpThisIter      | 5            |
| EpisodesSoFar   | 541          |
| TimeElapsed     | 293          |
| TimestepsSoFar  | 339968       |
| ev_tdlam_before | 0.785        |
| loss_ent        | 1.3417758    |
| loss_kl         | 0.006564677  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.017923815 |
| loss_vf_loss    | 0.015733894  |
----------------------------------
********** Iteration 83 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00101 |       0.00000 |       0.01675 |       0.00141 |       1.32463
     -0.00401 |       0.00000 |       0.01491 | 

********** Iteration 88 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00029 |       0.00000 |       0.01556 |       0.00146 |       1.13294
     -0.00341 |       0.00000 |       0.01316 |       0.00340 |       1.13352
     -0.00494 |       0.00000 |       0.01230 |       0.00367 |       1.13803
     -0.00800 |       0.00000 |       0.01177 |       0.00571 |       1.13798
     -0.00855 |       0.00000 |       0.01167 |       0.00643 |       1.13596
     -0.00995 |       0.00000 |       0.01144 |       0.00643 |       1.14199
     -0.01149 |       0.00000 |       0.01112 |       0.00621 |       1.13478
     -0.01225 |       0.00000 |       0.01116 |       0.00677 |       1.13708
     -0.01275 |       0.00000 |       0.01086 |       0.00659 |       1.14410
     -0.01380 |       0.00000 |       0.01091 |       0.00676 |       1.14560
Evaluating losses...
     -0.01603 |       0.00000 |       0.01045 |       0.00646 |       

     -0.01088 |       0.00000 |       0.01082 |       0.00575 |       1.16142
     -0.01186 |       0.00000 |       0.01070 |       0.00522 |       1.16239
Evaluating losses...
     -0.01334 |       0.00000 |       0.01040 |       0.00518 |       1.16130
----------------------------------
| EpLenMean       | 728          |
| EpRewMean       | -4.86        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 603          |
| TimeElapsed     | 333          |
| TimestepsSoFar  | 385024       |
| ev_tdlam_before | 0.811        |
| loss_ent        | 1.1613042    |
| loss_kl         | 0.005181462  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013342969 |
| loss_vf_loss    | 0.010404489  |
----------------------------------
********** Iteration 94 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00132 |       0.00000 |       0.01315 |       0.00138 |       1.20784
     -0.00469 |       0.00000 |       0.01148 | 

********** Iteration 99 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00067 |       0.00000 |       0.01260 |       0.00137 |       1.16420
     -0.00384 |       0.00000 |       0.01100 |       0.00401 |       1.16326
     -0.00563 |       0.00000 |       0.01023 |       0.00435 |       1.15462
     -0.00739 |       0.00000 |       0.00981 |       0.00391 |       1.15628
     -0.00867 |       0.00000 |       0.00965 |       0.00462 |       1.15685
     -0.00953 |       0.00000 |       0.00926 |       0.00497 |       1.15928
     -0.00936 |       0.00000 |       0.00916 |       0.00513 |       1.15656
     -0.01106 |       0.00000 |       0.00908 |       0.00552 |       1.15694
     -0.01128 |       0.00000 |       0.00876 |       0.00559 |       1.15373
     -0.01260 |       0.00000 |       0.00869 |       0.00564 |       1.15357
Evaluating losses...
     -0.01445 |       0.00000 |       0.00855 |       0.00640 |       

     -0.01295 |       0.00000 |       0.00972 |       0.00495 |       1.13102
     -0.01447 |       0.00000 |       0.00970 |       0.00552 |       1.13236
Evaluating losses...
     -0.01664 |       0.00000 |       0.00939 |       0.00554 |       1.13222
----------------------------------
| EpLenMean       | 744          |
| EpRewMean       | -4.86        |
| EpThisIter      | 6            |
| EpisodesSoFar   | 663          |
| TimeElapsed     | 369          |
| TimestepsSoFar  | 430080       |
| ev_tdlam_before | 0.851        |
| loss_ent        | 1.1322206    |
| loss_kl         | 0.0055365823 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01663854  |
| loss_vf_loss    | 0.009390486  |
----------------------------------
********** Iteration 105 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00183 |       0.00000 |       0.01394 |       0.00177 |       1.06005
     -0.00555 |       0.00000 |       0.01312 |

********** Iteration 110 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00141 |       0.00000 |       0.02014 |       0.00150 |       1.23356
     -0.00706 |       0.00000 |       0.01783 |       0.00437 |       1.23077
     -0.00976 |       0.00000 |       0.01680 |       0.00593 |       1.23366
     -0.01205 |       0.00000 |       0.01595 |       0.00616 |       1.23164
     -0.01312 |       0.00000 |       0.01542 |       0.00521 |       1.23507
     -0.01502 |       0.00000 |       0.01508 |       0.00633 |       1.23431
     -0.01627 |       0.00000 |       0.01470 |       0.00699 |       1.23124
     -0.01726 |       0.00000 |       0.01433 |       0.00629 |       1.23179
     -0.01798 |       0.00000 |       0.01426 |       0.00692 |       1.23434
     -0.01935 |       0.00000 |       0.01391 |       0.00696 |       1.23071
Evaluating losses...
     -0.02149 |       0.00000 |       0.01334 |       0.00715 |      

     -0.01692 |       0.00000 |       0.00992 |       0.00621 |       1.18142
     -0.01752 |       0.00000 |       0.00974 |       0.00641 |       1.17854
Evaluating losses...
     -0.02035 |       0.00000 |       0.00938 |       0.00695 |       1.18092
----------------------------------
| EpLenMean       | 779          |
| EpRewMean       | -4.81        |
| EpThisIter      | 5            |
| EpisodesSoFar   | 719          |
| TimeElapsed     | 405          |
| TimestepsSoFar  | 475136       |
| ev_tdlam_before | 0.801        |
| loss_ent        | 1.180917     |
| loss_kl         | 0.006948918  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020347524 |
| loss_vf_loss    | 0.009378431  |
----------------------------------
********** Iteration 116 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00119 |       0.00000 |       0.01423 |       0.00106 |       1.16152
     -0.00428 |       0.00000 |       0.01331 |

********** Iteration 121 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00022 |       0.00000 |       0.02196 |       0.00162 |       1.12472
     -0.00534 |       0.00000 |       0.01863 |       0.00383 |       1.12341
     -0.00854 |       0.00000 |       0.01738 |       0.00511 |       1.12018
     -0.01002 |       0.00000 |       0.01670 |       0.00586 |       1.11619
     -0.01187 |       0.00000 |       0.01617 |       0.00526 |       1.11919
     -0.01324 |       0.00000 |       0.01567 |       0.00503 |       1.12810
     -0.01370 |       0.00000 |       0.01553 |       0.00704 |       1.11734
     -0.01518 |       0.00000 |       0.01507 |       0.00622 |       1.12655
     -0.01602 |       0.00000 |       0.01476 |       0.00676 |       1.12474
     -0.01699 |       0.00000 |       0.01465 |       0.00717 |       1.12628
Evaluating losses...
     -0.01961 |       0.00000 |       0.01428 |       0.00667 |      

     -0.01576 |       0.00000 |       0.01221 |       0.00670 |       1.14249
     -0.01659 |       0.00000 |       0.01218 |       0.00697 |       1.14453
     -0.01690 |       0.00000 |       0.01191 |       0.00730 |       1.14386
     -0.01814 |       0.00000 |       0.01173 |       0.00729 |       1.14383
Evaluating losses...
     -0.02072 |       0.00000 |       0.01171 |       0.00765 |       1.14300
----------------------------------
| EpLenMean       | 849          |
| EpRewMean       | -4.78        |
| EpThisIter      | 4            |
| EpisodesSoFar   | 770          |
| TimeElapsed     | 488          |
| TimestepsSoFar  | 520192       |
| ev_tdlam_before | 0.777        |
| loss_ent        | 1.1430038    |
| loss_kl         | 0.007650545  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020718768 |
| loss_vf_loss    | 0.01170661   |
----------------------------------
********** Iteration 127 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |

********** Iteration 132 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00077 |       0.00000 |       0.02100 |       0.00247 |       1.11856
     -0.00652 |       0.00000 |       0.01839 |       0.00547 |       1.10819
     -0.00993 |       0.00000 |       0.01708 |       0.00534 |       1.10761
     -0.01196 |       0.00000 |       0.01609 |       0.00648 |       1.10789
     -0.01280 |       0.00000 |       0.01545 |       0.00511 |       1.11061
     -0.01449 |       0.00000 |       0.01483 |       0.00744 |       1.10271
     -0.01533 |       0.00000 |       0.01429 |       0.00639 |       1.10752
     -0.01737 |       0.00000 |       0.01389 |       0.00674 |       1.11166
     -0.01771 |       0.00000 |       0.01372 |       0.00715 |       1.10522
     -0.01906 |       0.00000 |       0.01341 |       0.00712 |       1.10778
Evaluating losses...
     -0.02156 |       0.00000 |       0.01296 |       0.00730 |      

     -0.01417 |       0.00000 |       0.01159 |       0.00624 |       1.09949
     -0.01508 |       0.00000 |       0.01156 |       0.00587 |       1.09796
Evaluating losses...
     -0.01800 |       0.00000 |       0.01175 |       0.00656 |       1.09645
----------------------------------
| EpLenMean       | 920          |
| EpRewMean       | -4.8         |
| EpThisIter      | 4            |
| EpisodesSoFar   | 817          |
| TimeElapsed     | 525          |
| TimestepsSoFar  | 565248       |
| ev_tdlam_before | 0.767        |
| loss_ent        | 1.0964519    |
| loss_kl         | 0.006561582  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.017999545 |
| loss_vf_loss    | 0.011747978  |
----------------------------------
********** Iteration 138 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00069 |       0.00000 |       0.01416 |       0.00128 |       1.08855
     -0.00392 |       0.00000 |       0.01289 |

********** Iteration 143 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00035 |       0.00000 |       0.01618 |       0.00169 |       1.04544
     -0.00570 |       0.00000 |       0.01371 |       0.00376 |       1.05220
     -0.00832 |       0.00000 |       0.01248 |       0.00363 |       1.03914
     -0.01101 |       0.00000 |       0.01206 |       0.00482 |       1.03115
     -0.01184 |       0.00000 |       0.01153 |       0.00568 |       1.02781
     -0.01374 |       0.00000 |       0.01110 |       0.00576 |       1.03967
     -0.01597 |       0.00000 |       0.01093 |       0.00580 |       1.02787
     -0.01620 |       0.00000 |       0.01059 |       0.00647 |       1.02564
     -0.01714 |       0.00000 |       0.01052 |       0.00638 |       1.02688
     -0.01828 |       0.00000 |       0.01038 |       0.00698 |       1.02762
Evaluating losses...
     -0.02040 |       0.00000 |       0.01013 |       0.00706 |      

     -0.01854 |       0.00000 |       0.01106 |       0.00649 |       1.04718
     -0.01876 |       0.00000 |       0.01090 |       0.00652 |       1.04692
Evaluating losses...
     -0.02063 |       0.00000 |       0.01069 |       0.00651 |       1.04773
----------------------------------
| EpLenMean       | 987          |
| EpRewMean       | -4.79        |
| EpThisIter      | 3            |
| EpisodesSoFar   | 860          |
| TimeElapsed     | 562          |
| TimestepsSoFar  | 610304       |
| ev_tdlam_before | 0.721        |
| loss_ent        | 1.0477346    |
| loss_kl         | 0.006510913  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020628132 |
| loss_vf_loss    | 0.010690616  |
----------------------------------
********** Iteration 149 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00061 |       0.00000 |       0.01481 |       0.00255 |       0.99319
     -0.00408 |       0.00000 |       0.01347 |

********** Iteration 154 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00012 |       0.00000 |       0.01824 |       0.00167 |       1.01792
     -0.00657 |       0.00000 |       0.01622 |       0.00376 |       1.01950
     -0.00970 |       0.00000 |       0.01500 |       0.00439 |       1.02228
     -0.01228 |       0.00000 |       0.01460 |       0.00514 |       1.02450
     -0.01453 |       0.00000 |       0.01394 |       0.00557 |       1.02240
     -0.01566 |       0.00000 |       0.01358 |       0.00566 |       1.02163
     -0.01649 |       0.00000 |       0.01333 |       0.00614 |       1.02506
     -0.01807 |       0.00000 |       0.01294 |       0.00724 |       1.01920
     -0.01888 |       0.00000 |       0.01270 |       0.00702 |       1.01803
     -0.02016 |       0.00000 |       0.01249 |       0.00693 |       1.01668
Evaluating losses...
     -0.02250 |       0.00000 |       0.01224 |       0.00671 |      

     -0.01224 |       0.00000 |       0.00837 |       0.00499 |       0.98813
     -0.01354 |       0.00000 |       0.00826 |       0.00541 |       0.98574
Evaluating losses...
     -0.01593 |       0.00000 |       0.00795 |       0.00506 |       0.98719
----------------------------------
| EpLenMean       | 1.13e+03     |
| EpRewMean       | -4.77        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 893          |
| TimeElapsed     | 596          |
| TimestepsSoFar  | 655360       |
| ev_tdlam_before | 0.653        |
| loss_ent        | 0.98719305   |
| loss_kl         | 0.005058933  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015931543 |
| loss_vf_loss    | 0.0079530645 |
----------------------------------
********** Iteration 160 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00078 |       0.00000 |       0.01398 |       0.00102 |       0.97853
     -0.00539 |       0.00000 |       0.01221 |

********** Iteration 165 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00230 |       0.00000 |       0.01103 |       0.00360 |       1.03378
     -0.00791 |       0.00000 |       0.00984 |       0.00564 |       1.03357
     -0.01079 |       0.00000 |       0.00936 |       0.00773 |       1.03147
     -0.01230 |       0.00000 |       0.00909 |       0.00595 |       1.03293
     -0.01362 |       0.00000 |       0.00881 |       0.00862 |       1.03422
     -0.01451 |       0.00000 |       0.00859 |       0.00817 |       1.03305
     -0.01498 |       0.00000 |       0.00836 |       0.00891 |       1.03159
     -0.01658 |       0.00000 |       0.00821 |       0.00821 |       1.03146
     -0.01603 |       0.00000 |       0.00812 |       0.00954 |       1.03412
     -0.01797 |       0.00000 |       0.00806 |       0.00939 |       1.03258
Evaluating losses...
     -0.01900 |       0.00000 |       0.00804 |       0.00944 |      

     -0.02157 |       0.00000 |       0.01027 |       0.01342 |       0.99681
     -0.02301 |       0.00000 |       0.01028 |       0.01489 |       1.00190
Evaluating losses...
     -0.02522 |       0.00000 |       0.00997 |       0.01289 |       0.99580
----------------------------------
| EpLenMean       | 1.36e+03     |
| EpRewMean       | -4.7         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 915          |
| TimeElapsed     | 630          |
| TimestepsSoFar  | 700416       |
| ev_tdlam_before | 0.461        |
| loss_ent        | 0.99580175   |
| loss_kl         | 0.012887446  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.025219165 |
| loss_vf_loss    | 0.009971423  |
----------------------------------
********** Iteration 171 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00133 |       0.00000 |       0.00997 |       0.00379 |       1.00745
     -0.00695 |       0.00000 |       0.00916 |

********** Iteration 176 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00161 |       0.00000 |       0.00614 |       0.00387 |       0.99044
     -0.00708 |       0.00000 |       0.00509 |       0.00638 |       0.99856
     -0.00914 |       0.00000 |       0.00466 |       0.00874 |       0.99849
     -0.01113 |       0.00000 |       0.00441 |       0.00709 |       0.99372
     -0.01081 |       0.00000 |       0.00425 |       0.00799 |       0.99595
     -0.01212 |       0.00000 |       0.00417 |       0.00815 |       0.99999
     -0.01321 |       0.00000 |       0.00403 |       0.00848 |       1.00261
     -0.01354 |       0.00000 |       0.00393 |       0.00911 |       0.99909
     -0.01403 |       0.00000 |       0.00384 |       0.00827 |       0.99595
     -0.01520 |       0.00000 |       0.00378 |       0.00944 |       0.99842
Evaluating losses...
     -0.01712 |       0.00000 |       0.00360 |       0.00956 |      

     -0.01488 |       0.00000 |       0.01007 |       0.00744 |       1.02061
     -0.01513 |       0.00000 |       0.00994 |       0.00772 |       1.02914
Evaluating losses...
     -0.01830 |       0.00000 |       0.00948 |       0.00779 |       1.02398
----------------------------------
| EpLenMean       | 1.65e+03     |
| EpRewMean       | -4.58        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 933          |
| TimeElapsed     | 663          |
| TimestepsSoFar  | 745472       |
| ev_tdlam_before | 0.562        |
| loss_ent        | 1.0239829    |
| loss_kl         | 0.007792374  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018301588 |
| loss_vf_loss    | 0.009478202  |
----------------------------------
********** Iteration 182 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00040 |       0.00000 |       0.01178 |       0.00386 |       1.00659
     -0.00527 |       0.00000 |       0.00992 |

********** Iteration 187 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00072 |       0.00000 |       0.00888 |       0.00277 |       0.97558
     -0.00473 |       0.00000 |       0.00706 |       0.00479 |       0.97415
     -0.00733 |       0.00000 |       0.00624 |       0.00386 |       0.97134
     -0.00886 |       0.00000 |       0.00592 |       0.00450 |       0.97235
     -0.01072 |       0.00000 |       0.00570 |       0.00455 |       0.97423
     -0.01286 |       0.00000 |       0.00562 |       0.00505 |       0.97087
     -0.01154 |       0.00000 |       0.00535 |       0.00590 |       0.97658
     -0.01380 |       0.00000 |       0.00524 |       0.00773 |       0.96116
     -0.01395 |       0.00000 |       0.00508 |       0.00678 |       0.96991
     -0.01464 |       0.00000 |       0.00499 |       0.00635 |       0.97134
Evaluating losses...
     -0.01638 |       0.00000 |       0.00473 |       0.00736 |      

     -0.01555 |       0.00000 |       0.00430 |       0.00736 |       1.06009
     -0.01570 |       0.00000 |       0.00426 |       0.00776 |       1.05797
Evaluating losses...
     -0.01789 |       0.00000 |       0.00402 |       0.00663 |       1.05422
----------------------------------
| EpLenMean       | 1.92e+03     |
| EpRewMean       | -4.32        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 948          |
| TimeElapsed     | 800          |
| TimestepsSoFar  | 790528       |
| ev_tdlam_before | 0.239        |
| loss_ent        | 1.0542243    |
| loss_kl         | 0.00662632   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.017893102 |
| loss_vf_loss    | 0.0040226164 |
----------------------------------
********** Iteration 193 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00026 |       0.00000 |       0.00702 |       0.00196 |       1.01335
     -0.00526 |       0.00000 |       0.00554 |

********** Iteration 198 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00406 |       0.00000 |       0.00629 |       0.00423 |       1.03192
     -0.00526 |       0.00000 |       0.00550 |       0.00462 |       1.02934
     -0.00677 |       0.00000 |       0.00520 |       0.00645 |       1.02420
     -0.01020 |       0.00000 |       0.00486 |       0.00568 |       1.02924
     -0.01085 |       0.00000 |       0.00461 |       0.00619 |       1.03290
     -0.01173 |       0.00000 |       0.00445 |       0.00786 |       1.02838
     -0.01379 |       0.00000 |       0.00436 |       0.00821 |       1.03561
     -0.01432 |       0.00000 |       0.00419 |       0.00824 |       1.03780
     -0.01501 |       0.00000 |       0.00414 |       0.00820 |       1.03068
     -0.01591 |       0.00000 |       0.00402 |       0.00924 |       1.03908
Evaluating losses...
     -0.01668 |       0.00000 |       0.00386 |       0.00902 |      

     -0.01747 |       0.00000 |       0.00473 |       0.01040 |       1.05991
     -0.01841 |       0.00000 |       0.00458 |       0.01008 |       1.05919
Evaluating losses...
     -0.02030 |       0.00000 |       0.00450 |       0.00981 |       1.05601
----------------------------------
| EpLenMean       | 2.21e+03     |
| EpRewMean       | -4.03        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 964          |
| TimeElapsed     | 834          |
| TimestepsSoFar  | 835584       |
| ev_tdlam_before | 0.313        |
| loss_ent        | 1.0560142    |
| loss_kl         | 0.009810595  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020298617 |
| loss_vf_loss    | 0.0044990587 |
----------------------------------
********** Iteration 204 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00186 |       0.00000 |       0.00580 |       0.00164 |       1.04839
     -0.00493 |       0.00000 |       0.00474 |

********** Iteration 209 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00589 |       0.00000 |       0.00465 |       0.00302 |       1.05675
     -0.00173 |       0.00000 |       0.00402 |       0.00437 |       1.06081
     -0.00536 |       0.00000 |       0.00383 |       0.00566 |       1.05652
     -0.00637 |       0.00000 |       0.00361 |       0.00521 |       1.06859
     -0.00887 |       0.00000 |       0.00347 |       0.00443 |       1.06173
     -0.00890 |       0.00000 |       0.00336 |       0.00789 |       1.06181
     -0.01136 |       0.00000 |       0.00331 |       0.00524 |       1.06702
     -0.01256 |       0.00000 |       0.00329 |       0.00574 |       1.07004
     -0.01288 |       0.00000 |       0.00321 |       0.00693 |       1.06484
     -0.01336 |       0.00000 |       0.00322 |       0.00804 |       1.07216
Evaluating losses...
     -0.01440 |       0.00000 |       0.00302 |       0.00823 |      

     -0.01558 |       0.00000 |       0.00341 |       0.01075 |       1.09880
     -0.01662 |       0.00000 |       0.00332 |       0.00956 |       1.09654
Evaluating losses...
     -0.01826 |       0.00000 |       0.00316 |       0.01046 |       1.09703
----------------------------------
| EpLenMean       | 2.46e+03     |
| EpRewMean       | -3.72        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 979          |
| TimeElapsed     | 870          |
| TimestepsSoFar  | 880640       |
| ev_tdlam_before | 0.516        |
| loss_ent        | 1.0970298    |
| loss_kl         | 0.01046261   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018256944 |
| loss_vf_loss    | 0.0031551968 |
----------------------------------
********** Iteration 215 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -3.21e-05 |       0.00000 |       0.00911 |       0.00375 |       1.02231
     -0.00490 |       0.00000 |       0.00727 |

********** Iteration 220 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00084 |       0.00000 |       0.00944 |       0.00318 |       1.03089
     -0.00459 |       0.00000 |       0.00762 |       0.00644 |       1.02533
     -0.00786 |       0.00000 |       0.00702 |       0.00752 |       1.02841
     -0.00949 |       0.00000 |       0.00671 |       0.00611 |       1.02528
     -0.01072 |       0.00000 |       0.00635 |       0.00706 |       1.02696
     -0.01217 |       0.00000 |       0.00617 |       0.00663 |       1.02813
     -0.01333 |       0.00000 |       0.00602 |       0.00673 |       1.02975
     -0.01409 |       0.00000 |       0.00593 |       0.00881 |       1.02526
     -0.01438 |       0.00000 |       0.00575 |       0.00762 |       1.03259
     -0.01538 |       0.00000 |       0.00565 |       0.00718 |       1.03153
Evaluating losses...
     -0.01737 |       0.00000 |       0.00536 |       0.00739 |      

     -0.01681 |       0.00000 |       0.00297 |       0.01115 |       1.04730
     -0.01761 |       0.00000 |       0.00295 |       0.01308 |       1.04153
Evaluating losses...
     -0.01861 |       0.00000 |       0.00278 |       0.01409 |       1.03919
----------------------------------
| EpLenMean       | 2.67e+03     |
| EpRewMean       | -3.38        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 994          |
| TimeElapsed     | 905          |
| TimestepsSoFar  | 925696       |
| ev_tdlam_before | 0.248        |
| loss_ent        | 1.0391898    |
| loss_kl         | 0.014094385  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018605672 |
| loss_vf_loss    | 0.0027759126 |
----------------------------------
********** Iteration 226 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00048 |       0.00000 |       0.00403 |       0.00599 |       1.07664
     -0.00436 |       0.00000 |       0.00301 |

********** Iteration 231 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00304 |       0.00000 |       0.00429 |       0.00686 |       1.10262
     -0.00202 |       0.00000 |       0.00371 |       0.00726 |       1.09672
     -0.00638 |       0.00000 |       0.00355 |       0.00646 |       1.09850
     -0.00807 |       0.00000 |       0.00341 |       0.00729 |       1.09697
     -0.00931 |       0.00000 |       0.00325 |       0.00934 |       1.10029
     -0.01110 |       0.00000 |       0.00322 |       0.00861 |       1.09564
     -0.01192 |       0.00000 |       0.00312 |       0.00912 |       1.09876
     -0.01300 |       0.00000 |       0.00305 |       0.00873 |       1.10015
     -0.01403 |       0.00000 |       0.00307 |       0.00865 |       1.10264
     -0.01457 |       0.00000 |       0.00294 |       0.00913 |       1.10034
Evaluating losses...
     -0.01684 |       0.00000 |       0.00283 |       0.00793 |      

     -0.01462 |       0.00000 |       0.00345 |       0.00739 |       1.14911
     -0.01431 |       0.00000 |       0.00340 |       0.00908 |       1.14573
Evaluating losses...
     -0.01797 |       0.00000 |       0.00319 |       0.00814 |       1.15294
----------------------------------
| EpLenMean       | 2.83e+03     |
| EpRewMean       | -2.89        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1009         |
| TimeElapsed     | 940          |
| TimestepsSoFar  | 970752       |
| ev_tdlam_before | 0.378        |
| loss_ent        | 1.1529399    |
| loss_kl         | 0.008143317  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.017968439 |
| loss_vf_loss    | 0.0031880115 |
----------------------------------
********** Iteration 237 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00112 |       0.00000 |       0.00768 |       0.00468 |       1.14727
     -0.00486 |       0.00000 |       0.00578 |

********** Iteration 242 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00185 |       0.00000 |       0.00519 |       0.00430 |       1.04829
     -0.00300 |       0.00000 |       0.00389 |       0.00582 |       1.06499
     -0.00623 |       0.00000 |       0.00330 |       0.00538 |       1.07208
     -0.00865 |       0.00000 |       0.00300 |       0.00587 |       1.07358
     -0.00973 |       0.00000 |       0.00276 |       0.00589 |       1.07492
     -0.01115 |       0.00000 |       0.00260 |       0.00727 |       1.06989
     -0.01127 |       0.00000 |       0.00247 |       0.00759 |       1.06661
     -0.01091 |       0.00000 |       0.00236 |       0.00761 |       1.07805
     -0.01273 |       0.00000 |       0.00228 |       0.00753 |       1.07321
     -0.01307 |       0.00000 |       0.00221 |       0.00772 |       1.07518
Evaluating losses...
     -0.01420 |       0.00000 |       0.00224 |       0.00934 |      

     -0.01085 |       0.00000 |       0.00352 |       0.00891 |       1.08980
     -0.01095 |       0.00000 |       0.00341 |       0.00911 |       1.09127
     -0.01144 |       0.00000 |       0.00331 |       0.00877 |       1.09324
     -0.01270 |       0.00000 |       0.00322 |       0.00764 |       1.08912
Evaluating losses...
     -0.01464 |       0.00000 |       0.00302 |       0.00752 |       1.08931
----------------------------------
| EpLenMean       | 2.94e+03     |
| EpRewMean       | -2.47        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1024         |
| TimeElapsed     | 1.13e+03     |
| TimestepsSoFar  | 1015808      |
| ev_tdlam_before | 0.48         |
| loss_ent        | 1.0893056    |
| loss_kl         | 0.0075240014 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014643367 |
| loss_vf_loss    | 0.003016755  |
----------------------------------
********** Iteration 248 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |

********** Iteration 253 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00161 |       0.00000 |       0.00253 |       0.00435 |       1.08402
     -0.00318 |       0.00000 |       0.00189 |       0.00721 |       1.08012
     -0.00647 |       0.00000 |       0.00163 |       0.00479 |       1.07792
     -0.00951 |       0.00000 |       0.00152 |       0.00437 |       1.07800
     -0.01101 |       0.00000 |       0.00146 |       0.00549 |       1.07811
     -0.01255 |       0.00000 |       0.00138 |       0.00558 |       1.07836
     -0.01438 |       0.00000 |       0.00135 |       0.00582 |       1.08350
     -0.01492 |       0.00000 |       0.00131 |       0.00625 |       1.07842
     -0.01634 |       0.00000 |       0.00128 |       0.00642 |       1.08325
     -0.01690 |       0.00000 |       0.00126 |       0.00687 |       1.08038
Evaluating losses...
     -0.01949 |       0.00000 |       0.00119 |       0.00642 |      

     -0.01744 |       0.00000 |       0.00627 |       0.01017 |       1.12328
     -0.01731 |       0.00000 |       0.00618 |       0.01186 |       1.11895
Evaluating losses...
     -0.02084 |       0.00000 |       0.00594 |       0.01113 |       1.11660
----------------------------------
| EpLenMean       | 2.99e+03     |
| EpRewMean       | -2.08        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1039         |
| TimeElapsed     | 1.17e+03     |
| TimestepsSoFar  | 1060864      |
| ev_tdlam_before | 0.477        |
| loss_ent        | 1.1166049    |
| loss_kl         | 0.011127495  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020843007 |
| loss_vf_loss    | 0.005941735  |
----------------------------------
********** Iteration 259 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00320 |       0.00000 |       0.00112 |       0.00266 |       1.11265
     -0.00383 |       0.00000 |       0.00057 |

********** Iteration 264 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00056 |       0.00000 |       0.00460 |       0.00408 |       1.10532
     -0.00533 |       0.00000 |       0.00415 |       0.00552 |       1.10235
     -0.00545 |       0.00000 |       0.00391 |       0.00914 |       1.09776
     -0.01014 |       0.00000 |       0.00373 |       0.00639 |       1.09802
     -0.01063 |       0.00000 |       0.00370 |       0.00877 |       1.08887
     -0.01108 |       0.00000 |       0.00363 |       0.00738 |       1.09393
     -0.01340 |       0.00000 |       0.00355 |       0.00832 |       1.09061
     -0.01370 |       0.00000 |       0.00338 |       0.00778 |       1.09549
     -0.01513 |       0.00000 |       0.00338 |       0.00857 |       1.09647
     -0.01514 |       0.00000 |       0.00324 |       0.00947 |       1.09570
Evaluating losses...
     -0.01686 |       0.00000 |       0.00311 |       0.01133 |      

     -0.01380 |       0.00000 |       0.00190 |       0.00865 |       1.10700
     -0.01476 |       0.00000 |       0.00185 |       0.00778 |       1.11467
Evaluating losses...
     -0.01648 |       0.00000 |       0.00175 |       0.00874 |       1.11255
----------------------------------
| EpLenMean       | 2.99e+03     |
| EpRewMean       | -1.89        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1054         |
| TimeElapsed     | 1.2e+03      |
| TimestepsSoFar  | 1105920      |
| ev_tdlam_before | 0.555        |
| loss_ent        | 1.112553     |
| loss_kl         | 0.008738206  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016479041 |
| loss_vf_loss    | 0.0017528324 |
----------------------------------
********** Iteration 270 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00199 |       0.00000 |       0.00621 |       0.00415 |       1.10111
     -0.00334 |       0.00000 |       0.00504 |

********** Iteration 275 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00077 |       0.00000 |       0.00950 |       0.00483 |       1.11341
     -0.00756 |       0.00000 |       0.00749 |       0.00707 |       1.12528
     -0.01153 |       0.00000 |       0.00657 |       0.00603 |       1.12196
     -0.01301 |       0.00000 |       0.00621 |       0.00787 |       1.11989
     -0.01436 |       0.00000 |       0.00597 |       0.00881 |       1.12278
     -0.01568 |       0.00000 |       0.00561 |       0.00855 |       1.12311
     -0.01684 |       0.00000 |       0.00541 |       0.00889 |       1.11925
     -0.01723 |       0.00000 |       0.00537 |       0.00931 |       1.11862
     -0.01823 |       0.00000 |       0.00530 |       0.00920 |       1.11873
     -0.01858 |       0.00000 |       0.00505 |       0.01028 |       1.11807
Evaluating losses...
     -0.02024 |       0.00000 |       0.00481 |       0.00981 |      

     -0.01923 |       0.00000 |       0.00546 |       0.01022 |       1.08748
     -0.02009 |       0.00000 |       0.00524 |       0.01013 |       1.07993
Evaluating losses...
     -0.02299 |       0.00000 |       0.00504 |       0.00862 |       1.08344
----------------------------------
| EpLenMean       | 3e+03        |
| EpRewMean       | -1.63        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1069         |
| TimeElapsed     | 1.24e+03     |
| TimestepsSoFar  | 1150976      |
| ev_tdlam_before | 0.503        |
| loss_ent        | 1.0834439    |
| loss_kl         | 0.008624235  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022987599 |
| loss_vf_loss    | 0.005042369  |
----------------------------------
********** Iteration 281 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00285 |       0.00000 |       0.00981 |       0.00781 |       1.05359
     -0.00693 |       0.00000 |       0.00866 |

********** Iteration 286 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00097 |       0.00000 |       0.00645 |       0.00845 |       1.11883
     -0.00632 |       0.00000 |       0.00507 |       0.01502 |       1.12064
     -0.00924 |       0.00000 |       0.00438 |       0.00919 |       1.12355
     -0.01211 |       0.00000 |       0.00404 |       0.01129 |       1.12054
     -0.01371 |       0.00000 |       0.00392 |       0.00962 |       1.11566
     -0.01350 |       0.00000 |       0.00363 |       0.02116 |       1.11452
     -0.01570 |       0.00000 |       0.00336 |       0.01278 |       1.11999
     -0.01678 |       0.00000 |       0.00331 |       0.01225 |       1.11556
     -0.01820 |       0.00000 |       0.00318 |       0.01231 |       1.11752
     -0.01843 |       0.00000 |       0.00308 |       0.01166 |       1.11865
Evaluating losses...
     -0.01904 |       0.00000 |       0.00300 |       0.01830 |      

     -0.01963 |       0.00000 |       0.00406 |       0.00939 |       1.12441
     -0.02032 |       0.00000 |       0.00408 |       0.01030 |       1.12330
Evaluating losses...
     -0.02150 |       0.00000 |       0.00374 |       0.01160 |       1.12547
----------------------------------
| EpLenMean       | 3e+03        |
| EpRewMean       | -1.42        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1084         |
| TimeElapsed     | 1.27e+03     |
| TimestepsSoFar  | 1196032      |
| ev_tdlam_before | 0.467        |
| loss_ent        | 1.1254742    |
| loss_kl         | 0.011601186  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.02149997  |
| loss_vf_loss    | 0.0037442292 |
----------------------------------
********** Iteration 292 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00055 |       0.00000 |       0.00194 |       0.00312 |       1.12290
     -0.00672 |       0.00000 |       0.00112 |

********** Iteration 297 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00263 |       0.00000 |       0.00251 |       0.00462 |       1.08757
     -0.00648 |       0.00000 |       0.00169 |       0.00529 |       1.08949
     -0.00850 |       0.00000 |       0.00146 |       0.00634 |       1.09356
     -0.00938 |       0.00000 |       0.00134 |       0.00578 |       1.09393
     -0.01054 |       0.00000 |       0.00125 |       0.00635 |       1.09344
     -0.01116 |       0.00000 |       0.00121 |       0.00727 |       1.09528
     -0.01218 |       0.00000 |       0.00116 |       0.00855 |       1.09411
     -0.01353 |       0.00000 |       0.00112 |       0.00769 |       1.09186
     -0.01453 |       0.00000 |       0.00109 |       0.00823 |       1.09213
     -0.01458 |       0.00000 |       0.00104 |       0.00955 |       1.09334
Evaluating losses...
     -0.01669 |       0.00000 |       0.00095 |       0.00900 |      

     -0.01847 |       0.00000 |       0.00265 |       0.00815 |       1.12430
     -0.01966 |       0.00000 |       0.00260 |       0.00804 |       1.12319
Evaluating losses...
     -0.02193 |       0.00000 |       0.00248 |       0.00889 |       1.12385
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -1.3         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1099         |
| TimeElapsed     | 1.3e+03      |
| TimestepsSoFar  | 1241088      |
| ev_tdlam_before | 0.33         |
| loss_ent        | 1.1238542    |
| loss_kl         | 0.00889264   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021929933 |
| loss_vf_loss    | 0.0024787635 |
----------------------------------
********** Iteration 303 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00020 |       0.00000 |       0.00501 |       0.00327 |       1.10683
     -0.00699 |       0.00000 |       0.00353 |

********** Iteration 308 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00365 |       0.00000 |       0.00684 |       0.00476 |       1.06791
     -0.00494 |       0.00000 |       0.00540 |       0.00500 |       1.06337
     -0.00660 |       0.00000 |       0.00501 |       0.00660 |       1.07457
     -0.00859 |       0.00000 |       0.00480 |       0.00625 |       1.06637
     -0.01079 |       0.00000 |       0.00459 |       0.00721 |       1.07084
     -0.01303 |       0.00000 |       0.00442 |       0.00612 |       1.07199
     -0.01336 |       0.00000 |       0.00423 |       0.00773 |       1.07242
     -0.01439 |       0.00000 |       0.00417 |       0.00797 |       1.07362
     -0.01504 |       0.00000 |       0.00408 |       0.00769 |       1.07400
     -0.01646 |       0.00000 |       0.00407 |       0.00840 |       1.07439
Evaluating losses...
     -0.01830 |       0.00000 |       0.00391 |       0.00992 |      

     -0.01637 |       0.00000 |       0.00765 |       0.00716 |       1.01043
     -0.01709 |       0.00000 |       0.00743 |       0.00742 |       1.01184
     -0.01776 |       0.00000 |       0.00729 |       0.00725 |       1.01384
Evaluating losses...
     -0.01935 |       0.00000 |       0.00710 |       0.00925 |       1.00947
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -1.35        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1114         |
| TimeElapsed     | 1.45e+03     |
| TimestepsSoFar  | 1286144      |
| ev_tdlam_before | 0.495        |
| loss_ent        | 1.0094746    |
| loss_kl         | 0.009250305  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019349359 |
| loss_vf_loss    | 0.007097397  |
----------------------------------
********** Iteration 314 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00027 |       0.00000 |       0.00143 |

********** Iteration 319 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00261 |       0.00000 |       0.00669 |       0.00326 |       1.04559
     -0.00449 |       0.00000 |       0.00451 |       0.00459 |       1.04767
     -0.00792 |       0.00000 |       0.00382 |       0.00563 |       1.05054
     -0.00914 |       0.00000 |       0.00331 |       0.00637 |       1.04913
     -0.00938 |       0.00000 |       0.00313 |       0.00755 |       1.05332
     -0.00966 |       0.00000 |       0.00314 |       0.00724 |       1.05703
     -0.01198 |       0.00000 |       0.00296 |       0.00836 |       1.05667
     -0.01331 |       0.00000 |       0.00288 |       0.00923 |       1.05578
     -0.01354 |       0.00000 |       0.00285 |       0.00962 |       1.05690
     -0.01544 |       0.00000 |       0.00278 |       0.00926 |       1.05672
Evaluating losses...
     -0.01684 |       0.00000 |       0.00257 |       0.00980 |      

     -0.01435 |       0.00000 |       0.00099 |       0.00686 |       1.05544
     -0.01529 |       0.00000 |       0.00096 |       0.00665 |       1.05713
     -0.01614 |       0.00000 |       0.00095 |       0.00728 |       1.05556
Evaluating losses...
     -0.01712 |       0.00000 |       0.00086 |       0.00696 |       1.05558
-----------------------------------
| EpLenMean       | 3e+03         |
| EpRewMean       | -1.3          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1129          |
| TimeElapsed     | 1.49e+03      |
| TimestepsSoFar  | 1331200       |
| ev_tdlam_before | 0.209         |
| loss_ent        | 1.0555837     |
| loss_kl         | 0.0069607086  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.017121391  |
| loss_vf_loss    | 0.00085871236 |
-----------------------------------
********** Iteration 325 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00256 |       0.00000 |  

********** Iteration 330 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00072 |       0.00000 |       0.00053 |       0.00273 |       1.04761
     -0.00535 |       0.00000 |       0.00033 |       0.00524 |       1.04228
     -0.00854 |       0.00000 |       0.00028 |       0.00459 |       1.04523
     -0.00932 |       0.00000 |       0.00025 |       0.00563 |       1.04692
     -0.01129 |       0.00000 |       0.00024 |       0.00513 |       1.04749
     -0.01281 |       0.00000 |       0.00022 |       0.00573 |       1.04452
     -0.01404 |       0.00000 |       0.00021 |       0.00590 |       1.04796
     -0.01519 |       0.00000 |       0.00020 |       0.00703 |       1.04743
     -0.01586 |       0.00000 |       0.00020 |       0.00762 |       1.04382
     -0.01639 |       0.00000 |       0.00019 |       0.00780 |       1.04722
Evaluating losses...
     -0.01971 |       0.00000 |       0.00020 |       0.00791 |      

     -0.01600 |       0.00000 |       0.00270 |       0.01264 |       1.07631
     -0.01584 |       0.00000 |       0.00258 |       0.01498 |       1.07614
     -0.01677 |       0.00000 |       0.00260 |       0.01329 |       1.07644
Evaluating losses...
     -0.01818 |       0.00000 |       0.00245 |       0.01376 |       1.07383
----------------------------------
| EpLenMean       | 3e+03        |
| EpRewMean       | -1.31        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1144         |
| TimeElapsed     | 1.52e+03     |
| TimestepsSoFar  | 1376256      |
| ev_tdlam_before | 0.388        |
| loss_ent        | 1.0738299    |
| loss_kl         | 0.013755021  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018176557 |
| loss_vf_loss    | 0.0024470997 |
----------------------------------
********** Iteration 336 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.01213 |       0.00000 |       0.00051 |

********** Iteration 341 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00107 |       0.00000 |       0.00425 |       0.00315 |       1.01894
     -0.00501 |       0.00000 |       0.00377 |       0.00777 |       1.01801
     -0.00719 |       0.00000 |       0.00357 |       0.00798 |       1.01349
     -0.00794 |       0.00000 |       0.00351 |       0.00989 |       1.01622
     -0.01144 |       0.00000 |       0.00336 |       0.00890 |       1.01425
     -0.01355 |       0.00000 |       0.00316 |       0.01078 |       1.01863
     -0.01305 |       0.00000 |       0.00309 |       0.01130 |       1.01820
     -0.01596 |       0.00000 |       0.00304 |       0.01008 |       1.01479
     -0.01658 |       0.00000 |       0.00298 |       0.00985 |       1.01949
     -0.01647 |       0.00000 |       0.00293 |       0.01002 |       1.02011
Evaluating losses...
     -0.01858 |       0.00000 |       0.00275 |       0.01281 |      

     -0.01279 |       0.00000 |       0.00379 |       0.00570 |       0.96052
     -0.01392 |       0.00000 |       0.00359 |       0.00634 |       0.95827
     -0.01417 |       0.00000 |       0.00365 |       0.00625 |       0.95766
Evaluating losses...
     -0.01594 |       0.00000 |       0.00334 |       0.00665 |       0.95832
----------------------------------
| EpLenMean       | 3e+03        |
| EpRewMean       | -1.26        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1159         |
| TimeElapsed     | 1.56e+03     |
| TimestepsSoFar  | 1421312      |
| ev_tdlam_before | 0.556        |
| loss_ent        | 0.95832473   |
| loss_kl         | 0.006653397  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015939316 |
| loss_vf_loss    | 0.0033372447 |
----------------------------------
********** Iteration 347 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00048 |       0.00000 |       0.00107 |

********** Iteration 352 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00042 |       0.00000 |       0.00680 |       0.00237 |       0.94996
     -0.00421 |       0.00000 |       0.00455 |       0.00313 |       0.94727
     -0.00609 |       0.00000 |       0.00402 |       0.00650 |       0.94413
     -0.00861 |       0.00000 |       0.00369 |       0.00484 |       0.94830
     -0.00927 |       0.00000 |       0.00353 |       0.00478 |       0.94903
     -0.01010 |       0.00000 |       0.00331 |       0.00535 |       0.94695
     -0.01045 |       0.00000 |       0.00318 |       0.00608 |       0.95194
     -0.01169 |       0.00000 |       0.00305 |       0.00662 |       0.94861
     -0.01243 |       0.00000 |       0.00298 |       0.00785 |       0.94877
     -0.01262 |       0.00000 |       0.00284 |       0.00737 |       0.95068
Evaluating losses...
     -0.01423 |       0.00000 |       0.00271 |       0.00733 |      

     -0.01686 |       0.00000 |       0.00313 |       0.00825 |       0.98123
     -0.01767 |       0.00000 |       0.00308 |       0.00858 |       0.98439
     -0.01765 |       0.00000 |       0.00304 |       0.00816 |       0.98375
Evaluating losses...
     -0.01907 |       0.00000 |       0.00284 |       0.00725 |       0.97897
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -1.1         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1174         |
| TimeElapsed     | 1.59e+03     |
| TimestepsSoFar  | 1466368      |
| ev_tdlam_before | 0.303        |
| loss_ent        | 0.97897273   |
| loss_kl         | 0.0072484734 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019071896 |
| loss_vf_loss    | 0.0028428528 |
----------------------------------
********** Iteration 358 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00163 |       0.00000 |       0.00249 |

********** Iteration 363 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00104 |       0.00000 |       0.00407 |       0.00418 |       0.91852
     -0.00481 |       0.00000 |       0.00308 |       0.00419 |       0.92280
     -0.00941 |       0.00000 |       0.00268 |       0.00603 |       0.92622
     -0.01180 |       0.00000 |       0.00245 |       0.00606 |       0.92168
     -0.01384 |       0.00000 |       0.00226 |       0.00734 |       0.92254
     -0.01351 |       0.00000 |       0.00218 |       0.00723 |       0.92172
     -0.01460 |       0.00000 |       0.00209 |       0.00868 |       0.91773
     -0.01521 |       0.00000 |       0.00204 |       0.00932 |       0.92031
     -0.01664 |       0.00000 |       0.00204 |       0.00848 |       0.91693
     -0.01792 |       0.00000 |       0.00198 |       0.00884 |       0.91417
Evaluating losses...
     -0.01862 |       0.00000 |       0.00188 |       0.00940 |      

     -0.01408 |       0.00000 |       0.00237 |       0.00698 |       0.93636
     -0.01504 |       0.00000 |       0.00232 |       0.00742 |       0.93901
     -0.01617 |       0.00000 |       0.00228 |       0.00809 |       0.93974
     -0.01657 |       0.00000 |       0.00227 |       0.00773 |       0.94163
Evaluating losses...
     -0.01866 |       0.00000 |       0.00215 |       0.00779 |       0.94119
----------------------------------
| EpLenMean       | 3.03e+03     |
| EpRewMean       | -1.09        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1188         |
| TimeElapsed     | 1.76e+03     |
| TimestepsSoFar  | 1511424      |
| ev_tdlam_before | 0.448        |
| loss_ent        | 0.94118583   |
| loss_kl         | 0.0077887313 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018657824 |
| loss_vf_loss    | 0.002147747  |
----------------------------------
********** Iteration 369 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |

********** Iteration 374 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00134 |       0.00000 |       0.00228 |       0.00185 |       0.98873
     -0.00451 |       0.00000 |       0.00187 |       0.00586 |       0.98756
     -0.00689 |       0.00000 |       0.00176 |       0.00552 |       0.98853
     -0.00962 |       0.00000 |       0.00168 |       0.00642 |       0.99144
     -0.01172 |       0.00000 |       0.00163 |       0.00725 |       0.99163
     -0.01344 |       0.00000 |       0.00165 |       0.00702 |       0.99294
     -0.01489 |       0.00000 |       0.00164 |       0.00750 |       0.99050
     -0.01420 |       0.00000 |       0.00155 |       0.00994 |       0.99701
     -0.01633 |       0.00000 |       0.00151 |       0.00926 |       0.99423
     -0.01708 |       0.00000 |       0.00150 |       0.01029 |       0.99553
Evaluating losses...
     -0.01784 |       0.00000 |       0.00144 |       0.01046 |      

     -0.00803 |       0.00000 |       0.00149 |       0.00722 |       0.93826
     -0.00805 |       0.00000 |       0.00143 |       0.00657 |       0.94035
Evaluating losses...
     -0.00979 |       0.00000 |       0.00133 |       0.00724 |       0.93558
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -1.06        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1203         |
| TimeElapsed     | 1.8e+03      |
| TimestepsSoFar  | 1556480      |
| ev_tdlam_before | 0.226        |
| loss_ent        | 0.93557835   |
| loss_kl         | 0.007236067  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009787291 |
| loss_vf_loss    | 0.0013279341 |
----------------------------------
********** Iteration 380 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00044 |       0.00000 |       0.00297 |       0.00710 |       0.95403
     -0.00190 |       0.00000 |       0.00275 |

********** Iteration 385 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00090 |       0.00000 |       0.00546 |       0.00819 |       0.98844
     -0.00770 |       0.00000 |       0.00481 |       0.00590 |       0.99356
     -0.00917 |       0.00000 |       0.00449 |       0.00683 |       0.98939
     -0.01041 |       0.00000 |       0.00435 |       0.00651 |       0.99165
     -0.01099 |       0.00000 |       0.00422 |       0.00733 |       0.98700
     -0.01236 |       0.00000 |       0.00412 |       0.00763 |       0.98974
     -0.01362 |       0.00000 |       0.00400 |       0.00809 |       0.98665
     -0.01457 |       0.00000 |       0.00398 |       0.00856 |       0.98741
     -0.01634 |       0.00000 |       0.00385 |       0.00801 |       0.98557
     -0.01658 |       0.00000 |       0.00381 |       0.00769 |       0.98732
Evaluating losses...
     -0.01835 |       0.00000 |       0.00360 |       0.00869 |      

     -0.01072 |       0.00000 |       0.00185 |       0.00768 |       0.98512
     -0.01119 |       0.00000 |       0.00181 |       0.00729 |       0.98562
     -0.01155 |       0.00000 |       0.00182 |       0.00794 |       0.98394
Evaluating losses...
     -0.01364 |       0.00000 |       0.00169 |       0.00796 |       0.98340
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.9          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1218          |
| TimeElapsed     | 1.84e+03      |
| TimestepsSoFar  | 1601536       |
| ev_tdlam_before | 0.445         |
| loss_ent        | 0.98339754    |
| loss_kl         | 0.007962148   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0136423325 |
| loss_vf_loss    | 0.0016915571  |
-----------------------------------
********** Iteration 391 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00186 |       0.00000 |  

********** Iteration 396 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00428 |       0.00000 |       0.00494 |       0.00689 |       0.95806
     -0.00385 |       0.00000 |       0.00398 |       0.00464 |       0.96583
     -0.00645 |       0.00000 |       0.00357 |       0.00537 |       0.96408
     -0.00846 |       0.00000 |       0.00320 |       0.00547 |       0.96961
     -0.01044 |       0.00000 |       0.00301 |       0.00714 |       0.96388
     -0.01106 |       0.00000 |       0.00282 |       0.00680 |       0.96883
     -0.01236 |       0.00000 |       0.00278 |       0.00717 |       0.96910
     -0.01347 |       0.00000 |       0.00265 |       0.00784 |       0.96509
     -0.01378 |       0.00000 |       0.00258 |       0.00941 |       0.97216
     -0.01431 |       0.00000 |       0.00253 |       0.00946 |       0.97328
Evaluating losses...
     -0.01673 |       0.00000 |       0.00248 |       0.01022 |      

     -0.01611 |       0.00000 |       0.00306 |       0.00924 |       0.95612
     -0.01706 |       0.00000 |       0.00306 |       0.00901 |       0.95839
Evaluating losses...
     -0.01860 |       0.00000 |       0.00289 |       0.01039 |       0.95587
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.91        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1234         |
| TimeElapsed     | 1.87e+03     |
| TimestepsSoFar  | 1646592      |
| ev_tdlam_before | 0.433        |
| loss_ent        | 0.9558702    |
| loss_kl         | 0.010388265  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018602675 |
| loss_vf_loss    | 0.002891854  |
----------------------------------
********** Iteration 402 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00160 |       0.00000 |       0.00391 |       0.00540 |       0.94988
     -0.00156 |       0.00000 |       0.00307 |

********** Iteration 407 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00044 |       0.00000 |       0.00471 |       0.00537 |       0.99186
     -0.00715 |       0.00000 |       0.00395 |       0.00476 |       0.98935
     -0.01017 |       0.00000 |       0.00360 |       0.00579 |       0.99342
     -0.01206 |       0.00000 |       0.00353 |       0.00497 |       0.99093
     -0.01249 |       0.00000 |       0.00335 |       0.00603 |       0.99294
     -0.01426 |       0.00000 |       0.00320 |       0.00553 |       0.99249
     -0.01550 |       0.00000 |       0.00318 |       0.00702 |       0.99222
     -0.01565 |       0.00000 |       0.00310 |       0.00766 |       0.99106
     -0.01711 |       0.00000 |       0.00300 |       0.00735 |       0.99315
     -0.01730 |       0.00000 |       0.00300 |       0.00663 |       0.99129
Evaluating losses...
     -0.01864 |       0.00000 |       0.00284 |       0.00783 |      

     -0.01485 |       0.00000 |       0.00143 |       0.00768 |       1.02417
     -0.01598 |       0.00000 |       0.00138 |       0.00766 |       1.02449
Evaluating losses...
     -0.01695 |       0.00000 |       0.00129 |       0.00748 |       1.02833
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.93        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1249         |
| TimeElapsed     | 1.91e+03     |
| TimestepsSoFar  | 1691648      |
| ev_tdlam_before | 0.238        |
| loss_ent        | 1.0283312    |
| loss_kl         | 0.0074755186 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016949706 |
| loss_vf_loss    | 0.001285525  |
----------------------------------
********** Iteration 413 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00079 |       0.00000 |       0.00524 |       0.00434 |       1.03064
     -0.00644 |       0.00000 |       0.00389 |

********** Iteration 418 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00049 |       0.00000 |       0.00482 |       0.00869 |       1.04809
     -0.00587 |       0.00000 |       0.00336 |       0.00647 |       1.03828
     -0.00911 |       0.00000 |       0.00309 |       0.00891 |       1.04372
     -0.01067 |       0.00000 |       0.00290 |       0.00905 |       1.04473
     -0.01233 |       0.00000 |       0.00262 |       0.00765 |       1.04665
     -0.01324 |       0.00000 |       0.00252 |       0.00809 |       1.04313
     -0.01432 |       0.00000 |       0.00246 |       0.01005 |       1.04755
     -0.01460 |       0.00000 |       0.00239 |       0.00901 |       1.05146
     -0.01567 |       0.00000 |       0.00239 |       0.00959 |       1.05031
     -0.01634 |       0.00000 |       0.00224 |       0.00960 |       1.04954
Evaluating losses...
     -0.01729 |       0.00000 |       0.00213 |       0.00962 |      

     -0.01635 |       0.00000 |       0.00213 |       0.00906 |       1.09904
     -0.01703 |       0.00000 |       0.00202 |       0.00969 |       1.10129
Evaluating losses...
     -0.01783 |       0.00000 |       0.00201 |       0.00991 |       1.09247
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.81        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1264         |
| TimeElapsed     | 1.94e+03     |
| TimestepsSoFar  | 1736704      |
| ev_tdlam_before | 0.37         |
| loss_ent        | 1.0924673    |
| loss_kl         | 0.009913886  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01783106  |
| loss_vf_loss    | 0.0020137234 |
----------------------------------
********** Iteration 424 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00010 |       0.00000 |       0.00059 |       0.00221 |       1.06327
     -0.00535 |       0.00000 |       0.00036 |

********** Iteration 429 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00046 |       0.00000 |       0.00214 |       0.00364 |       1.05312
     -0.00792 |       0.00000 |       0.00146 |       0.00616 |       1.05643
     -0.01219 |       0.00000 |       0.00140 |       0.00876 |       1.05429
     -0.01472 |       0.00000 |       0.00130 |       0.00780 |       1.05931
     -0.01653 |       0.00000 |       0.00127 |       0.00935 |       1.05921
     -0.01707 |       0.00000 |       0.00124 |       0.00889 |       1.06107
     -0.01802 |       0.00000 |       0.00119 |       0.00913 |       1.06148
     -0.01877 |       0.00000 |       0.00117 |       0.01039 |       1.06411
     -0.01960 |       0.00000 |       0.00119 |       0.00964 |       1.06260
     -0.02015 |       0.00000 |       0.00116 |       0.01032 |       1.06317
Evaluating losses...
     -0.02156 |       0.00000 |       0.00110 |       0.01059 |      

     -0.01313 |       0.00000 |       0.00116 |       0.00788 |       1.04242
     -0.01204 |       0.00000 |       0.00110 |       0.00988 |       1.04533
Evaluating losses...
     -0.01532 |       0.00000 |       0.00106 |       0.00961 |       1.04107
----------------------------------
| EpLenMean       | 3.03e+03     |
| EpRewMean       | -0.81        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1278         |
| TimeElapsed     | 2.12e+03     |
| TimestepsSoFar  | 1781760      |
| ev_tdlam_before | 0.234        |
| loss_ent        | 1.041071     |
| loss_kl         | 0.009610845  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015319368 |
| loss_vf_loss    | 0.0010604374 |
----------------------------------
********** Iteration 435 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00367 |       0.00000 |       0.00209 |       0.00995 |       1.01904
     -0.00472 |       0.00000 |       0.00136 |

********** Iteration 440 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00304 |       0.00000 |       0.00502 |       0.00508 |       1.03706
     -0.00489 |       0.00000 |       0.00469 |       0.00644 |       1.04052
     -0.00950 |       0.00000 |       0.00443 |       0.00760 |       1.03458
     -0.01178 |       0.00000 |       0.00429 |       0.00744 |       1.03542
     -0.01272 |       0.00000 |       0.00409 |       0.00852 |       1.03177
     -0.01282 |       0.00000 |       0.00397 |       0.00874 |       1.03528
     -0.01515 |       0.00000 |       0.00395 |       0.00987 |       1.03304
     -0.01365 |       0.00000 |       0.00384 |       0.01101 |       1.03391
     -0.01561 |       0.00000 |       0.00381 |       0.01049 |       1.03568
     -0.01716 |       0.00000 |       0.00372 |       0.01057 |       1.03492
Evaluating losses...
     -0.01754 |       0.00000 |       0.00366 |       0.01283 |      

     -0.01574 |       0.00000 |       0.00396 |       0.01575 |       1.00235
     -0.01632 |       0.00000 |       0.00391 |       0.01509 |       1.00427
Evaluating losses...
     -0.01696 |       0.00000 |       0.00375 |       0.01766 |       1.01379
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.69        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1293         |
| TimeElapsed     | 2.16e+03     |
| TimestepsSoFar  | 1826816      |
| ev_tdlam_before | 0.149        |
| loss_ent        | 1.0137867    |
| loss_kl         | 0.017662484  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016961891 |
| loss_vf_loss    | 0.0037473047 |
----------------------------------
********** Iteration 446 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00075 |       0.00000 |       0.00747 |       0.00501 |       0.97927
     -0.00648 |       0.00000 |       0.00641 |

********** Iteration 451 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00020 |       0.00000 |       0.00289 |       0.00287 |       1.06703
     -0.00607 |       0.00000 |       0.00240 |       0.00613 |       1.06797
     -0.00944 |       0.00000 |       0.00229 |       0.00861 |       1.06309
     -0.01000 |       0.00000 |       0.00217 |       0.00791 |       1.06180
     -0.01170 |       0.00000 |       0.00208 |       0.00875 |       1.07031
     -0.01394 |       0.00000 |       0.00204 |       0.00937 |       1.06893
     -0.01524 |       0.00000 |       0.00197 |       0.00835 |       1.07025
     -0.01538 |       0.00000 |       0.00198 |       0.01108 |       1.06949
     -0.01643 |       0.00000 |       0.00192 |       0.00942 |       1.07344
     -0.01805 |       0.00000 |       0.00189 |       0.00975 |       1.07230
Evaluating losses...
     -0.01923 |       0.00000 |       0.00179 |       0.00934 |      

     -0.01963 |       0.00000 |       0.00299 |       0.00984 |       1.05238
     -0.02013 |       0.00000 |       0.00297 |       0.01044 |       1.04985
Evaluating losses...
     -0.02035 |       0.00000 |       0.00283 |       0.01130 |       1.05560
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.73        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1308         |
| TimeElapsed     | 2.19e+03     |
| TimestepsSoFar  | 1871872      |
| ev_tdlam_before | 0.631        |
| loss_ent        | 1.0556       |
| loss_kl         | 0.011295282  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020345086 |
| loss_vf_loss    | 0.0028343627 |
----------------------------------
********** Iteration 457 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00270 |       0.00000 |       0.00226 |       0.00331 |       1.05978
     -0.00492 |       0.00000 |       0.00195 |

********** Iteration 462 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00207 |       0.00000 |       0.00762 |       0.00619 |       1.09310
     -0.00901 |       0.00000 |       0.00565 |       0.00932 |       1.09755
     -0.01173 |       0.00000 |       0.00402 |       0.00884 |       1.09651
     -0.01404 |       0.00000 |       0.00366 |       0.00823 |       1.09940
     -0.01576 |       0.00000 |       0.00353 |       0.01025 |       1.10068
     -0.01714 |       0.00000 |       0.00338 |       0.01073 |       1.10332
     -0.01826 |       0.00000 |       0.00336 |       0.01086 |       1.10268
     -0.01871 |       0.00000 |       0.00325 |       0.01240 |       1.10178
     -0.01939 |       0.00000 |       0.00316 |       0.01225 |       1.10213
     -0.02084 |       0.00000 |       0.00310 |       0.01275 |       1.10790
Evaluating losses...
     -0.02247 |       0.00000 |       0.00292 |       0.01331 |      

     -0.01565 |       0.00000 |       0.00242 |       0.00763 |       1.09679
     -0.01598 |       0.00000 |       0.00243 |       0.00855 |       1.09155
Evaluating losses...
     -0.01688 |       0.00000 |       0.00231 |       0.01104 |       1.09232
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.88        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1323         |
| TimeElapsed     | 2.23e+03     |
| TimestepsSoFar  | 1916928      |
| ev_tdlam_before | 0.448        |
| loss_ent        | 1.0923212    |
| loss_kl         | 0.011043576  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01687909  |
| loss_vf_loss    | 0.0023060327 |
----------------------------------
********** Iteration 468 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00270 |       0.00000 |       0.00050 |       0.00314 |       1.07778
     -0.00721 |       0.00000 |       0.00030 |

********** Iteration 473 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00071 |       0.00000 |       0.01070 |       0.00663 |       1.05283
     -0.00520 |       0.00000 |       0.00943 |       0.00647 |       1.04827
     -0.01088 |       0.00000 |       0.00890 |       0.00609 |       1.04748
     -0.01313 |       0.00000 |       0.00856 |       0.00732 |       1.04599
     -0.01487 |       0.00000 |       0.00835 |       0.00736 |       1.04548
     -0.01543 |       0.00000 |       0.00832 |       0.01093 |       1.04385
     -0.01397 |       0.00000 |       0.00823 |       0.01271 |       1.04512
     -0.01700 |       0.00000 |       0.00805 |       0.01080 |       1.04415
     -0.01714 |       0.00000 |       0.00809 |       0.01253 |       1.04450
     -0.01905 |       0.00000 |       0.00795 |       0.01192 |       1.04444
Evaluating losses...
     -0.01944 |       0.00000 |       0.00774 |       0.01687 |      

     -0.01344 |       0.00000 |       0.00115 |       0.00660 |       1.12662
     -0.01403 |       0.00000 |       0.00107 |       0.00742 |       1.12935
Evaluating losses...
     -0.01475 |       0.00000 |       0.00108 |       0.00786 |       1.13249
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.79        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1338         |
| TimeElapsed     | 2.26e+03     |
| TimestepsSoFar  | 1961984      |
| ev_tdlam_before | 0.197        |
| loss_ent        | 1.1324941    |
| loss_kl         | 0.007856791  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014751553 |
| loss_vf_loss    | 0.001079191  |
----------------------------------
********** Iteration 479 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00205 |       0.00000 |       0.00164 |       0.00342 |       1.11879
     -0.00385 |       0.00000 |       0.00133 |

********** Iteration 484 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00155 |       0.00000 |       0.00345 |       0.00652 |       1.10028
     -0.00951 |       0.00000 |       0.00289 |       0.00710 |       1.09232
     -0.01251 |       0.00000 |       0.00253 |       0.00623 |       1.09076
     -0.01385 |       0.00000 |       0.00238 |       0.00717 |       1.09261
     -0.01321 |       0.00000 |       0.00220 |       0.00907 |       1.09360
     -0.01534 |       0.00000 |       0.00218 |       0.00783 |       1.09590
     -0.01596 |       0.00000 |       0.00204 |       0.00853 |       1.09605
     -0.01716 |       0.00000 |       0.00198 |       0.00756 |       1.10062
     -0.01733 |       0.00000 |       0.00201 |       0.00829 |       1.09718
     -0.01722 |       0.00000 |       0.00190 |       0.00857 |       1.09842
Evaluating losses...
     -0.01926 |       0.00000 |       0.00184 |       0.00816 |      

     -0.01632 |       0.00000 |       0.00017 |       0.00739 |       1.11587
     -0.01834 |       0.00000 |       0.00016 |       0.00729 |       1.11692
     -0.01974 |       0.00000 |       0.00015 |       0.00774 |       1.11678
     -0.02000 |       0.00000 |       0.00015 |       0.00875 |       1.11678
Evaluating losses...
     -0.02328 |       0.00000 |       0.00015 |       0.00894 |       1.11705
-----------------------------------
| EpLenMean       | 3.03e+03      |
| EpRewMean       | -0.81         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1353          |
| TimeElapsed     | 2.44e+03      |
| TimestepsSoFar  | 2007040       |
| ev_tdlam_before | -1.2          |
| loss_ent        | 1.1170466     |
| loss_kl         | 0.0089384895  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.02328366   |
| loss_vf_loss    | 0.00014837044 |
-----------------------------------
********** Iteration 490 ************
Optimizing...
     pol_surr |    pol_entpen |  

********** Iteration 495 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00175 |       0.00000 |       0.00205 |       0.00205 |       1.16737
     -0.00445 |       0.00000 |       0.00158 |       0.00360 |       1.16716
     -0.00694 |       0.00000 |       0.00147 |       0.00535 |       1.16525
     -0.00828 |       0.00000 |       0.00146 |       0.00649 |       1.16455
     -0.01008 |       0.00000 |       0.00138 |       0.00613 |       1.16316
     -0.01224 |       0.00000 |       0.00141 |       0.00769 |       1.16327
     -0.01201 |       0.00000 |       0.00135 |       0.00908 |       1.16389
     -0.01359 |       0.00000 |       0.00134 |       0.00918 |       1.16101
     -0.01445 |       0.00000 |       0.00131 |       0.00910 |       1.16231
     -0.01448 |       0.00000 |       0.00130 |       0.00908 |       1.15988
Evaluating losses...
     -0.01551 |       0.00000 |       0.00124 |       0.00900 |      

     -0.01724 |       0.00000 |       0.00194 |       0.00827 |       1.08977
     -0.01839 |       0.00000 |       0.00193 |       0.00826 |       1.09038
Evaluating losses...
     -0.02019 |       0.00000 |       0.00187 |       0.00795 |       1.08851
----------------------------------
| EpLenMean       | 3.03e+03     |
| EpRewMean       | -0.75        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1368         |
| TimeElapsed     | 2.47e+03     |
| TimestepsSoFar  | 2052096      |
| ev_tdlam_before | 0.596        |
| loss_ent        | 1.0885056    |
| loss_kl         | 0.007947785  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020186039 |
| loss_vf_loss    | 0.0018720893 |
----------------------------------
********** Iteration 501 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00104 |       0.00000 |       0.00279 |       0.00425 |       1.14292
      0.00036 |       0.00000 |       0.00219 |

********** Iteration 506 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00151 |       0.00000 |       0.00277 |       0.00400 |       1.12541
     -0.00643 |       0.00000 |       0.00207 |       0.00703 |       1.12288
     -0.00819 |       0.00000 |       0.00189 |       0.00668 |       1.12395
     -0.01188 |       0.00000 |       0.00175 |       0.00689 |       1.12458
     -0.01315 |       0.00000 |       0.00171 |       0.00788 |       1.12753
     -0.01505 |       0.00000 |       0.00168 |       0.00729 |       1.12501
     -0.01579 |       0.00000 |       0.00160 |       0.00903 |       1.12696
     -0.01645 |       0.00000 |       0.00157 |       0.00857 |       1.12867
     -0.01767 |       0.00000 |       0.00155 |       0.01030 |       1.12922
     -0.01765 |       0.00000 |       0.00151 |       0.01023 |       1.12718
Evaluating losses...
     -0.02035 |       0.00000 |       0.00145 |       0.01071 |      

     -0.01929 |       0.00000 |       0.00145 |       0.00943 |       1.13074
     -0.01978 |       0.00000 |       0.00141 |       0.01152 |       1.13084
Evaluating losses...
     -0.02225 |       0.00000 |       0.00131 |       0.01099 |       1.13090
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.88        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1383         |
| TimeElapsed     | 2.5e+03      |
| TimestepsSoFar  | 2097152      |
| ev_tdlam_before | -0.076       |
| loss_ent        | 1.1309011    |
| loss_kl         | 0.010985829  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.02224896  |
| loss_vf_loss    | 0.0013113602 |
----------------------------------
********** Iteration 512 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00223 |       0.00000 |       0.00450 |       0.00683 |       1.13667
     -0.00549 |       0.00000 |       0.00370 |

********** Iteration 517 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00417 |       0.00000 |       0.00566 |       0.00698 |       1.15909
     -0.00741 |       0.00000 |       0.00483 |       0.01098 |       1.15358
     -0.01313 |       0.00000 |       0.00456 |       0.00871 |       1.16353
     -0.01506 |       0.00000 |       0.00429 |       0.00727 |       1.15663
     -0.01683 |       0.00000 |       0.00412 |       0.00765 |       1.16047
     -0.01792 |       0.00000 |       0.00400 |       0.00712 |       1.16336
     -0.01887 |       0.00000 |       0.00390 |       0.00763 |       1.16062
     -0.01949 |       0.00000 |       0.00377 |       0.00758 |       1.16160
     -0.02048 |       0.00000 |       0.00374 |       0.00801 |       1.16193
     -0.02061 |       0.00000 |       0.00363 |       0.00866 |       1.16118
Evaluating losses...
     -0.02197 |       0.00000 |       0.00340 |       0.00862 |      

     -0.01792 |       0.00000 |       0.00383 |       0.00962 |       1.16106
     -0.01880 |       0.00000 |       0.00374 |       0.00919 |       1.16178
     -0.01949 |       0.00000 |       0.00371 |       0.00905 |       1.15929
Evaluating losses...
     -0.01941 |       0.00000 |       0.00355 |       0.01065 |       1.15686
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.93        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1398         |
| TimeElapsed     | 2.54e+03     |
| TimestepsSoFar  | 2142208      |
| ev_tdlam_before | 0.651        |
| loss_ent        | 1.1568569    |
| loss_kl         | 0.010647737  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019414822 |
| loss_vf_loss    | 0.00355163   |
----------------------------------
********** Iteration 523 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.68e-05 |       0.00000 |       0.00607 |

********** Iteration 528 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00123 |       0.00000 |       0.00047 |       0.00451 |       1.17856
     -0.00695 |       0.00000 |       0.00027 |       0.00765 |       1.17416
     -0.01171 |       0.00000 |       0.00018 |       0.00844 |       1.17304
     -0.01317 |       0.00000 |       0.00014 |       0.00705 |       1.17420
     -0.01381 |       0.00000 |       0.00013 |       0.00936 |       1.17576
     -0.01648 |       0.00000 |       0.00011 |       0.00880 |       1.17539
     -0.01829 |       0.00000 |       0.00010 |       0.00878 |       1.17522
     -0.01976 |       0.00000 |      9.32e-05 |       0.00880 |       1.17611
     -0.01991 |       0.00000 |      8.83e-05 |       0.00965 |       1.17583
     -0.02131 |       0.00000 |      8.24e-05 |       0.01031 |       1.17868
Evaluating losses...
     -0.02339 |       0.00000 |      7.95e-05 |       0.00981 |      

     -0.01947 |       0.00000 |      5.12e-05 |       0.00899 |       1.20422
     -0.02026 |       0.00000 |      4.84e-05 |       0.01053 |       1.20843
Evaluating losses...
     -0.02309 |       0.00000 |      4.66e-05 |       0.00901 |       1.20616
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.78        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1413         |
| TimeElapsed     | 2.57e+03     |
| TimestepsSoFar  | 2187264      |
| ev_tdlam_before | -1.26        |
| loss_ent        | 1.206163     |
| loss_kl         | 0.009007411  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.023088679 |
| loss_vf_loss    | 4.657494e-05 |
----------------------------------
********** Iteration 534 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00149 |       0.00000 |       0.00319 |       0.00981 |       1.19081
     -0.00677 |       0.00000 |       0.00149 |

********** Iteration 539 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00109 |       0.00000 |       0.00180 |       0.00615 |       1.18978
     -0.00814 |       0.00000 |       0.00166 |       0.01207 |       1.19977
     -0.01134 |       0.00000 |       0.00162 |       0.01271 |       1.20043
     -0.01339 |       0.00000 |       0.00156 |       0.00595 |       1.19822
     -0.01453 |       0.00000 |       0.00148 |       0.00584 |       1.19627
     -0.01395 |       0.00000 |       0.00144 |       0.01462 |       1.19725
     -0.01577 |       0.00000 |       0.00142 |       0.00884 |       1.19713
     -0.01643 |       0.00000 |       0.00138 |       0.00904 |       1.19789
     -0.01655 |       0.00000 |       0.00129 |       0.00869 |       1.19429
     -0.01733 |       0.00000 |       0.00131 |       0.00935 |       1.19744
Evaluating losses...
     -0.01835 |       0.00000 |       0.00120 |       0.00888 |      

     -0.01824 |       0.00000 |       0.00175 |       0.00779 |       1.17820
     -0.01920 |       0.00000 |       0.00173 |       0.00803 |       1.18195
Evaluating losses...
     -0.02086 |       0.00000 |       0.00160 |       0.00936 |       1.18005
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.81        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1428         |
| TimeElapsed     | 2.6e+03      |
| TimestepsSoFar  | 2232320      |
| ev_tdlam_before | 0.382        |
| loss_ent        | 1.1800505    |
| loss_kl         | 0.009361584  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020863006 |
| loss_vf_loss    | 0.001603385  |
----------------------------------
********** Iteration 545 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00018 |       0.00000 |       0.00188 |       0.00357 |       1.18216
     -0.00636 |       0.00000 |       0.00157 |

********** Iteration 550 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00034 |       0.00000 |       0.00169 |       0.00185 |       1.13723
     -0.00673 |       0.00000 |       0.00099 |       0.00434 |       1.13624
     -0.00899 |       0.00000 |       0.00082 |       0.00524 |       1.14228
     -0.01080 |       0.00000 |       0.00076 |       0.00658 |       1.14151
     -0.01178 |       0.00000 |       0.00069 |       0.00770 |       1.14327
     -0.01360 |       0.00000 |       0.00068 |       0.00789 |       1.14241
     -0.01442 |       0.00000 |       0.00064 |       0.00901 |       1.14297
     -0.01450 |       0.00000 |       0.00061 |       0.01017 |       1.14499
     -0.01516 |       0.00000 |       0.00058 |       0.00902 |       1.14394
     -0.01584 |       0.00000 |       0.00057 |       0.01115 |       1.14184
Evaluating losses...
     -0.01831 |       0.00000 |       0.00054 |       0.01013 |      

     -0.01398 |       0.00000 |       0.00112 |       0.00926 |       1.14040
     -0.01427 |       0.00000 |       0.00107 |       0.00908 |       1.13971
     -0.01494 |       0.00000 |       0.00104 |       0.00991 |       1.14189
Evaluating losses...
     -0.01642 |       0.00000 |       0.00100 |       0.01033 |       1.14264
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.79        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1443         |
| TimeElapsed     | 2.77e+03     |
| TimestepsSoFar  | 2277376      |
| ev_tdlam_before | -0.309       |
| loss_ent        | 1.1426438    |
| loss_kl         | 0.010326482  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016423564 |
| loss_vf_loss    | 0.0010027614 |
----------------------------------
********** Iteration 556 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00636 |       0.00000 |       0.00240 |

********** Iteration 561 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00265 |       0.00000 |       0.00370 |       0.00499 |       1.12866
     -0.00335 |       0.00000 |       0.00272 |       0.00394 |       1.12331
     -0.00710 |       0.00000 |       0.00241 |       0.00582 |       1.11850
     -0.00852 |       0.00000 |       0.00227 |       0.00507 |       1.12489
     -0.01052 |       0.00000 |       0.00225 |       0.00553 |       1.12574
     -0.01178 |       0.00000 |       0.00209 |       0.00673 |       1.12536
     -0.01232 |       0.00000 |       0.00200 |       0.00697 |       1.12753
     -0.01227 |       0.00000 |       0.00199 |       0.00750 |       1.12752
     -0.01342 |       0.00000 |       0.00190 |       0.00782 |       1.12547
     -0.01359 |       0.00000 |       0.00186 |       0.00809 |       1.12404
Evaluating losses...
     -0.01581 |       0.00000 |       0.00180 |       0.00856 |      

     -0.02075 |       0.00000 |       0.00087 |       0.01204 |       1.12353
     -0.02185 |       0.00000 |       0.00087 |       0.01138 |       1.12326
Evaluating losses...
     -0.02386 |       0.00000 |       0.00087 |       0.01095 |       1.12895
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.73         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1458          |
| TimeElapsed     | 2.81e+03      |
| TimestepsSoFar  | 2322432       |
| ev_tdlam_before | -0.485        |
| loss_ent        | 1.1289533     |
| loss_kl         | 0.010952109   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0238649    |
| loss_vf_loss    | 0.00087084004 |
-----------------------------------
********** Iteration 567 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00017 |       0.00000 |       0.00303 |       0.00368 |       1.14725
     -0.00760 |       0.00000 |  

********** Iteration 572 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00162 |       0.00000 |       0.00437 |       0.00312 |       1.08953
     -0.00577 |       0.00000 |       0.00281 |       0.00601 |       1.08070
     -0.00691 |       0.00000 |       0.00254 |       0.00587 |       1.08501
     -0.00828 |       0.00000 |       0.00219 |       0.00554 |       1.08471
     -0.00975 |       0.00000 |       0.00200 |       0.00562 |       1.08754
     -0.01111 |       0.00000 |       0.00194 |       0.00562 |       1.08774
     -0.01205 |       0.00000 |       0.00187 |       0.00689 |       1.08713
     -0.01213 |       0.00000 |       0.00177 |       0.00711 |       1.08933
     -0.01325 |       0.00000 |       0.00176 |       0.00924 |       1.08100
     -0.01423 |       0.00000 |       0.00170 |       0.00813 |       1.08355
Evaluating losses...
     -0.01564 |       0.00000 |       0.00157 |       0.00783 |      

     -0.02233 |       0.00000 |       0.00386 |       0.00913 |       1.08042
     -0.02275 |       0.00000 |       0.00380 |       0.01039 |       1.07887
Evaluating losses...
     -0.02379 |       0.00000 |       0.00367 |       0.01022 |       1.07856
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.68        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1473         |
| TimeElapsed     | 2.84e+03     |
| TimestepsSoFar  | 2367488      |
| ev_tdlam_before | 0.577        |
| loss_ent        | 1.0785563    |
| loss_kl         | 0.01022268   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.023793723 |
| loss_vf_loss    | 0.0036731812 |
----------------------------------
********** Iteration 578 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00136 |       0.00000 |       0.00510 |       0.00366 |       1.08219
     -0.00497 |       0.00000 |       0.00445 |

********** Iteration 583 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00141 |       0.00000 |       0.00346 |       0.00585 |       1.08838
     -0.00701 |       0.00000 |       0.00278 |       0.00574 |       1.08498
     -0.00938 |       0.00000 |       0.00252 |       0.00584 |       1.08331
     -0.01213 |       0.00000 |       0.00235 |       0.00581 |       1.08314
     -0.01370 |       0.00000 |       0.00220 |       0.00560 |       1.08798
     -0.01583 |       0.00000 |       0.00212 |       0.00561 |       1.09200
     -0.01622 |       0.00000 |       0.00202 |       0.00597 |       1.09270
     -0.01770 |       0.00000 |       0.00194 |       0.00702 |       1.09630
     -0.01820 |       0.00000 |       0.00188 |       0.00799 |       1.09307
     -0.01752 |       0.00000 |       0.00183 |       0.01030 |       1.08588
Evaluating losses...
     -0.01879 |       0.00000 |       0.00175 |       0.01006 |      

     -0.01113 |       0.00000 |       0.00042 |       0.00946 |       1.10331
     -0.01228 |       0.00000 |       0.00041 |       0.00900 |       1.10646
Evaluating losses...
     -0.01303 |       0.00000 |       0.00036 |       0.00953 |       1.11328
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.65         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 1488          |
| TimeElapsed     | 2.87e+03      |
| TimestepsSoFar  | 2412544       |
| ev_tdlam_before | 0.565         |
| loss_ent        | 1.1132776     |
| loss_kl         | 0.009529284   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.013026264  |
| loss_vf_loss    | 0.00036413077 |
-----------------------------------
********** Iteration 589 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00018 |       0.00000 |       0.00020 |       0.00200 |       1.16072
     -0.00721 |       0.00000 |  

********** Iteration 594 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00925 |       0.00000 |       0.00212 |       0.00487 |       1.14559
     -0.00042 |       0.00000 |       0.00172 |       0.00924 |       1.13045
     -0.00607 |       0.00000 |       0.00140 |       0.00479 |       1.14330
     -0.00857 |       0.00000 |       0.00132 |       0.00572 |       1.14588
     -0.00932 |       0.00000 |       0.00126 |       0.00808 |       1.14466
     -0.01090 |       0.00000 |       0.00123 |       0.00867 |       1.14817
     -0.01226 |       0.00000 |       0.00121 |       0.00922 |       1.14797
     -0.01183 |       0.00000 |       0.00122 |       0.00892 |       1.14532
     -0.01369 |       0.00000 |       0.00112 |       0.01067 |       1.14707
     -0.01494 |       0.00000 |       0.00108 |       0.01013 |       1.14402
Evaluating losses...
     -0.01582 |       0.00000 |       0.00105 |       0.01119 |      

     -0.01713 |       0.00000 |       0.00243 |       0.01094 |       1.09514
     -0.01908 |       0.00000 |       0.00236 |       0.01283 |       1.09066
Evaluating losses...
     -0.01566 |       0.00000 |       0.00226 |       0.01257 |       1.08801
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.68        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1503         |
| TimeElapsed     | 2.91e+03     |
| TimestepsSoFar  | 2457600      |
| ev_tdlam_before | 0.502        |
| loss_ent        | 1.0880055    |
| loss_kl         | 0.012565449  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015657771 |
| loss_vf_loss    | 0.0022600994 |
----------------------------------
********** Iteration 600 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00346 |       0.00000 |       0.00896 |       0.00453 |       1.10394
     -0.00612 |       0.00000 |       0.00695 |

********** Iteration 605 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00321 |       0.00000 |       0.00367 |       0.00468 |       1.11685
     -0.00353 |       0.00000 |       0.00224 |       0.00389 |       1.12035
     -0.00542 |       0.00000 |       0.00203 |       0.00548 |       1.12639
     -0.00725 |       0.00000 |       0.00192 |       0.00588 |       1.12363
     -0.01039 |       0.00000 |       0.00187 |       0.00560 |       1.12792
     -0.01189 |       0.00000 |       0.00183 |       0.00648 |       1.12699
     -0.01278 |       0.00000 |       0.00181 |       0.00763 |       1.12492
     -0.01387 |       0.00000 |       0.00172 |       0.00755 |       1.12842
     -0.01397 |       0.00000 |       0.00171 |       0.00801 |       1.12563
     -0.01583 |       0.00000 |       0.00168 |       0.00747 |       1.12949
Evaluating losses...
     -0.01717 |       0.00000 |       0.00160 |       0.00735 |      

     -0.01457 |       0.00000 |       0.00180 |       0.00746 |       1.11374
     -0.01530 |       0.00000 |       0.00181 |       0.00860 |       1.11619
     -0.01654 |       0.00000 |       0.00173 |       0.00861 |       1.11427
     -0.01823 |       0.00000 |       0.00174 |       0.00886 |       1.11547
Evaluating losses...
     -0.01942 |       0.00000 |       0.00169 |       0.00897 |       1.11481
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.87        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1517         |
| TimeElapsed     | 3.11e+03     |
| TimestepsSoFar  | 2502656      |
| ev_tdlam_before | 0.71         |
| loss_ent        | 1.114807     |
| loss_kl         | 0.008972007  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019422082 |
| loss_vf_loss    | 0.0016866042 |
----------------------------------
********** Iteration 611 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |

********** Iteration 616 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00222 |       0.00000 |      8.88e-05 |       0.00514 |       1.16768
     -0.00545 |       0.00000 |      6.81e-05 |       0.00786 |       1.17454
     -0.01196 |       0.00000 |      6.07e-05 |       0.00636 |       1.16520
     -0.01538 |       0.00000 |      5.61e-05 |       0.00721 |       1.16927
     -0.01803 |       0.00000 |      5.29e-05 |       0.00675 |       1.16741
     -0.01987 |       0.00000 |      4.91e-05 |       0.00717 |       1.16755
     -0.02169 |       0.00000 |      4.63e-05 |       0.00855 |       1.16699
     -0.02314 |       0.00000 |      4.57e-05 |       0.00864 |       1.16551
     -0.02437 |       0.00000 |      4.38e-05 |       0.00962 |       1.16265
     -0.02555 |       0.00000 |      4.11e-05 |       0.01101 |       1.16518
Evaluating losses...
     -0.02784 |       0.00000 |      4.01e-05 |       0.01365 |      

     -0.01625 |       0.00000 |       0.00090 |       0.00958 |       1.15694
     -0.01627 |       0.00000 |       0.00088 |       0.00896 |       1.16029
     -0.01681 |       0.00000 |       0.00087 |       0.01030 |       1.15917
Evaluating losses...
     -0.01924 |       0.00000 |       0.00086 |       0.00897 |       1.15806
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.85        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1532         |
| TimeElapsed     | 3.14e+03     |
| TimestepsSoFar  | 2547712      |
| ev_tdlam_before | 0.285        |
| loss_ent        | 1.1580626    |
| loss_kl         | 0.0089669945 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019235894 |
| loss_vf_loss    | 0.0008577398 |
----------------------------------
********** Iteration 622 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00038 |       0.00000 |       0.00133 |

********** Iteration 627 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00261 |       0.00000 |       0.00067 |       0.00530 |       1.11654
     -0.00340 |       0.00000 |       0.00037 |       0.00550 |       1.12246
     -0.00722 |       0.00000 |       0.00028 |       0.00581 |       1.12510
     -0.00919 |       0.00000 |       0.00025 |       0.00536 |       1.12124
     -0.00922 |       0.00000 |       0.00023 |       0.00611 |       1.13198
     -0.01119 |       0.00000 |       0.00022 |       0.00668 |       1.12512
     -0.01157 |       0.00000 |       0.00021 |       0.00779 |       1.13057
     -0.01255 |       0.00000 |       0.00020 |       0.00759 |       1.13187
     -0.01331 |       0.00000 |       0.00019 |       0.00833 |       1.13465
     -0.01274 |       0.00000 |       0.00019 |       0.00916 |       1.13455
Evaluating losses...
     -0.01439 |       0.00000 |       0.00018 |       0.01050 |      

     -0.02093 |       0.00000 |       0.00011 |       0.00863 |       1.15400
     -0.02046 |       0.00000 |       0.00011 |       0.00983 |       1.15166
Evaluating losses...
     -0.02404 |       0.00000 |       0.00011 |       0.00859 |       1.14456
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.84        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1547         |
| TimeElapsed     | 3.18e+03     |
| TimestepsSoFar  | 2592768      |
| ev_tdlam_before | -0.524       |
| loss_ent        | 1.1445572    |
| loss_kl         | 0.00859057   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.024035191 |
| loss_vf_loss    | 0.0001077132 |
----------------------------------
********** Iteration 633 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00474 |       0.00000 |       0.00255 |       0.03223 |       1.10896
     -0.00385 |       0.00000 |       0.00101 |

********** Iteration 638 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00161 |       0.00000 |       0.00280 |       0.00371 |       1.09295
     -0.00536 |       0.00000 |       0.00200 |       0.00504 |       1.09673
     -0.00744 |       0.00000 |       0.00180 |       0.00803 |       1.10600
     -0.00806 |       0.00000 |       0.00168 |       0.00673 |       1.09913
     -0.00970 |       0.00000 |       0.00166 |       0.00598 |       1.10555
     -0.01069 |       0.00000 |       0.00158 |       0.00757 |       1.09835
     -0.01113 |       0.00000 |       0.00152 |       0.00905 |       1.10265
     -0.01282 |       0.00000 |       0.00146 |       0.00848 |       1.09838
     -0.01237 |       0.00000 |       0.00145 |       0.00790 |       1.10309
     -0.01306 |       0.00000 |       0.00141 |       0.00825 |       1.10095
Evaluating losses...
     -0.01495 |       0.00000 |       0.00143 |       0.00830 |      

     -0.01934 |       0.00000 |       0.00012 |       0.00792 |       1.07389
     -0.02008 |       0.00000 |       0.00011 |       0.00853 |       1.07528
Evaluating losses...
     -0.02217 |       0.00000 |       0.00011 |       0.00979 |       1.07574
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.85        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1562         |
| TimeElapsed     | 3.21e+03     |
| TimestepsSoFar  | 2637824      |
| ev_tdlam_before | -0.556       |
| loss_ent        | 1.0757354    |
| loss_kl         | 0.009789308  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022166906 |
| loss_vf_loss    | 0.0001078601 |
----------------------------------
********** Iteration 644 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00049 |       0.00000 |       0.00318 |       0.00277 |       1.10054
     -0.00627 |       0.00000 |       0.00266 |

********** Iteration 649 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00223 |       0.00000 |       0.00482 |       0.00381 |       1.07452
     -0.00516 |       0.00000 |       0.00395 |       0.00694 |       1.05835
     -0.00614 |       0.00000 |       0.00367 |       0.00570 |       1.05955
     -0.00980 |       0.00000 |       0.00358 |       0.00559 |       1.05912
     -0.01118 |       0.00000 |       0.00334 |       0.00687 |       1.05692
     -0.01155 |       0.00000 |       0.00323 |       0.00724 |       1.05417
     -0.01373 |       0.00000 |       0.00312 |       0.00794 |       1.05616
     -0.01355 |       0.00000 |       0.00301 |       0.00969 |       1.05040
     -0.01461 |       0.00000 |       0.00299 |       0.00886 |       1.05459
     -0.01629 |       0.00000 |       0.00289 |       0.00861 |       1.05558
Evaluating losses...
     -0.01619 |       0.00000 |       0.00276 |       0.01119 |      

     -0.01285 |       0.00000 |       0.00105 |       0.01067 |       1.09281
     -0.01401 |       0.00000 |       0.00098 |       0.00989 |       1.09243
     -0.01419 |       0.00000 |       0.00094 |       0.00961 |       1.09127
Evaluating losses...
     -0.01555 |       0.00000 |       0.00087 |       0.00933 |       1.09235
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.87        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1577         |
| TimeElapsed     | 3.25e+03     |
| TimestepsSoFar  | 2682880      |
| ev_tdlam_before | 0.196        |
| loss_ent        | 1.0923532    |
| loss_kl         | 0.009330668  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015546144 |
| loss_vf_loss    | 0.00086619   |
----------------------------------
********** Iteration 655 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00210 |       0.00000 |       0.00314 |

********** Iteration 660 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00343 |       0.00000 |       0.00171 |       0.01343 |       1.06436
     -0.00658 |       0.00000 |       0.00144 |       0.00729 |       1.07043
     -0.00859 |       0.00000 |       0.00136 |       0.00787 |       1.07164
     -0.01162 |       0.00000 |       0.00130 |       0.00763 |       1.07457
     -0.01323 |       0.00000 |       0.00124 |       0.00667 |       1.07806
     -0.01392 |       0.00000 |       0.00126 |       0.00773 |       1.07628
     -0.01541 |       0.00000 |       0.00120 |       0.00715 |       1.07703
     -0.01577 |       0.00000 |       0.00120 |       0.00883 |       1.07522
     -0.01537 |       0.00000 |       0.00115 |       0.00979 |       1.07004
     -0.01716 |       0.00000 |       0.00113 |       0.00883 |       1.07904
Evaluating losses...
     -0.01793 |       0.00000 |       0.00119 |       0.00757 |      

     -0.01592 |       0.00000 |       0.00143 |       0.00942 |       1.09298
     -0.01705 |       0.00000 |       0.00141 |       0.01014 |       1.09159
Evaluating losses...
     -0.01804 |       0.00000 |       0.00135 |       0.01077 |       1.09771
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.86        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1592         |
| TimeElapsed     | 3.28e+03     |
| TimestepsSoFar  | 2727936      |
| ev_tdlam_before | 0.59         |
| loss_ent        | 1.0977081    |
| loss_kl         | 0.010768518  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018035183 |
| loss_vf_loss    | 0.0013458548 |
----------------------------------
********** Iteration 666 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00063 |       0.00000 |       0.00240 |       0.00458 |       1.13597
     -0.00818 |       0.00000 |       0.00179 |

********** Iteration 671 ************
Eval num_timesteps=2748416, episode_reward=-0.12 +/- 0.85
Episode length: 3000.00 +/- 0.00
New best mean reward!
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00135 |       0.00000 |       0.00051 |       0.00175 |       1.07567
     -0.00561 |       0.00000 |       0.00028 |       0.00362 |       1.07393
     -0.00946 |       0.00000 |       0.00022 |       0.00519 |       1.07669
     -0.01092 |       0.00000 |       0.00019 |       0.00620 |       1.07571
     -0.01333 |       0.00000 |       0.00017 |       0.00663 |       1.07830
     -0.01362 |       0.00000 |       0.00015 |       0.00699 |       1.07669
     -0.01554 |       0.00000 |       0.00013 |       0.00686 |       1.07672
     -0.01606 |       0.00000 |       0.00012 |       0.00780 |       1.07686
     -0.01720 |       0.00000 |       0.00011 |       0.00831 |       1.07929
     -0.01806 |       0.00000 |       0.00011 |       0

     -0.00762 |       0.00000 |       0.00032 |       0.00461 |       1.06427
     -0.00767 |       0.00000 |       0.00030 |       0.00504 |       1.06505
     -0.00825 |       0.00000 |       0.00028 |       0.00547 |       1.06651
     -0.00861 |       0.00000 |       0.00030 |       0.00640 |       1.06502
Evaluating losses...
     -0.01009 |       0.00000 |       0.00026 |       0.00585 |       1.06877
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.78         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1607          |
| TimeElapsed     | 3.47e+03      |
| TimestepsSoFar  | 2772992       |
| ev_tdlam_before | 0.639         |
| loss_ent        | 1.0687695     |
| loss_kl         | 0.005846224   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.01008991   |
| loss_vf_loss    | 0.00025962712 |
-----------------------------------
********** Iteration 677 ************
Optimizing...
     pol_surr |    pol_entpen |  

********** Iteration 682 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00264 |       0.00000 |       0.00310 |       0.00144 |       1.11485
     -0.00344 |       0.00000 |       0.00162 |       0.00241 |       1.11103
     -0.00822 |       0.00000 |       0.00126 |       0.00373 |       1.10706
     -0.00848 |       0.00000 |       0.00103 |       0.00480 |       1.10719
     -0.00970 |       0.00000 |       0.00086 |       0.00553 |       1.10549
     -0.01180 |       0.00000 |       0.00083 |       0.00593 |       1.10252
     -0.01229 |       0.00000 |       0.00075 |       0.00599 |       1.10168
     -0.01288 |       0.00000 |       0.00074 |       0.00744 |       1.10360
     -0.01380 |       0.00000 |       0.00071 |       0.00763 |       1.10443
     -0.01428 |       0.00000 |       0.00068 |       0.00810 |       1.10507
Evaluating losses...
     -0.01556 |       0.00000 |       0.00065 |       0.00812 |      

     -0.01047 |       0.00000 |       0.00088 |       0.00763 |       1.08848
     -0.00988 |       0.00000 |       0.00084 |       0.00782 |       1.08497
Evaluating losses...
     -0.01016 |       0.00000 |       0.00079 |       0.00852 |       1.08651
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.67        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1622         |
| TimeElapsed     | 3.5e+03      |
| TimestepsSoFar  | 2818048      |
| ev_tdlam_before | 0.307        |
| loss_ent        | 1.086509     |
| loss_kl         | 0.008516241  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010158685 |
| loss_vf_loss    | 0.0007876449 |
----------------------------------
********** Iteration 688 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00217 |       0.00000 |       0.00209 |       0.00339 |       1.09679
     -0.00945 |       0.00000 |       0.00097 |

********** Iteration 693 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00039 |       0.00000 |       0.00049 |       0.00420 |       1.09200
     -0.00986 |       0.00000 |       0.00025 |       0.00584 |       1.09546
     -0.01441 |       0.00000 |       0.00018 |       0.00598 |       1.10235
     -0.01707 |       0.00000 |       0.00015 |       0.00696 |       1.10309
     -0.01975 |       0.00000 |       0.00013 |       0.00816 |       1.10687
     -0.02044 |       0.00000 |       0.00011 |       0.00816 |       1.10717
     -0.02200 |       0.00000 |       0.00010 |       0.01075 |       1.11270
     -0.02269 |       0.00000 |      9.82e-05 |       0.01206 |       1.10826
     -0.02353 |       0.00000 |      9.29e-05 |       0.01214 |       1.11548
     -0.02493 |       0.00000 |      8.67e-05 |       0.01228 |       1.11431
Evaluating losses...
     -0.02729 |       0.00000 |      8.18e-05 |       0.01250 |      

     -0.01747 |       0.00000 |       0.00021 |       0.00918 |       1.14893
     -0.01815 |       0.00000 |       0.00021 |       0.00970 |       1.14956
     -0.01817 |       0.00000 |       0.00020 |       0.01005 |       1.14720
Evaluating losses...
     -0.02027 |       0.00000 |       0.00020 |       0.01042 |       1.15030
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.67         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1637          |
| TimeElapsed     | 3.54e+03      |
| TimestepsSoFar  | 2863104       |
| ev_tdlam_before | -1.02         |
| loss_ent        | 1.1503016     |
| loss_kl         | 0.010423024   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.020273725  |
| loss_vf_loss    | 0.00019951277 |
-----------------------------------
********** Iteration 699 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00049 |       0.00000 |  

********** Iteration 704 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -3.39e-05 |       0.00000 |       0.00045 |       0.00583 |       1.08158
     -0.00661 |       0.00000 |       0.00026 |       0.00555 |       1.07859
     -0.01207 |       0.00000 |       0.00020 |       0.00569 |       1.08349
     -0.01338 |       0.00000 |       0.00017 |       0.00690 |       1.08455
     -0.01594 |       0.00000 |       0.00015 |       0.00735 |       1.08486
     -0.01693 |       0.00000 |       0.00014 |       0.00757 |       1.08577
     -0.01813 |       0.00000 |       0.00013 |       0.00877 |       1.08701
     -0.01949 |       0.00000 |       0.00012 |       0.00900 |       1.08540
     -0.01964 |       0.00000 |       0.00011 |       0.01027 |       1.08917
     -0.02103 |       0.00000 |       0.00011 |       0.01019 |       1.08883
Evaluating losses...
     -0.02273 |       0.00000 |       0.00010 |       0.01006 |      

     -0.02411 |       0.00000 |      2.02e-05 |       0.01088 |       1.06688
     -0.02597 |       0.00000 |      1.96e-05 |       0.01061 |       1.06382
Evaluating losses...
     -0.02958 |       0.00000 |      1.83e-05 |       0.01092 |       1.06638
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.63        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1652         |
| TimeElapsed     | 3.58e+03     |
| TimestepsSoFar  | 2908160      |
| ev_tdlam_before | -2.65        |
| loss_ent        | 1.0663751    |
| loss_kl         | 0.010920785  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.02957973  |
| loss_vf_loss    | 1.833425e-05 |
----------------------------------
********** Iteration 710 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00105 |       0.00000 |       0.00181 |       0.00136 |       1.04703
     -0.00698 |       0.00000 |       0.00097 |

********** Iteration 715 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00303 |       0.00000 |       0.00808 |       0.00873 |       1.12191
     -0.01103 |       0.00000 |       0.00582 |       0.01546 |       1.13143
     -0.01387 |       0.00000 |       0.00555 |       0.01279 |       1.13424
     -0.01625 |       0.00000 |       0.00499 |       0.01328 |       1.13471
     -0.01707 |       0.00000 |       0.00469 |       0.01558 |       1.13613
     -0.01819 |       0.00000 |       0.00421 |       0.01253 |       1.13285
     -0.01861 |       0.00000 |       0.00415 |       0.01199 |       1.13129
     -0.01968 |       0.00000 |       0.00387 |       0.01357 |       1.13560
     -0.02044 |       0.00000 |       0.00385 |       0.01414 |       1.13408
     -0.02042 |       0.00000 |       0.00372 |       0.01342 |       1.13209
Evaluating losses...
     -0.02192 |       0.00000 |       0.00361 |       0.01285 |      

     -0.01592 |       0.00000 |       0.00124 |       0.00845 |       1.08817
     -0.01737 |       0.00000 |       0.00121 |       0.00983 |       1.08581
Evaluating losses...
     -0.01873 |       0.00000 |       0.00117 |       0.00943 |       1.08660
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.49        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1667         |
| TimeElapsed     | 3.61e+03     |
| TimestepsSoFar  | 2953216      |
| ev_tdlam_before | 0.63         |
| loss_ent        | 1.0865989    |
| loss_kl         | 0.009426712  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018729154 |
| loss_vf_loss    | 0.0011740687 |
----------------------------------
********** Iteration 721 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00151 |       0.00000 |       0.00012 |       0.00234 |       1.06073
     -0.00811 |       0.00000 |      8.59e-05 |

********** Iteration 726 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00091 |       0.00000 |       0.00325 |       0.00622 |       1.04805
     -0.00729 |       0.00000 |       0.00277 |       0.00563 |       1.05722
     -0.00981 |       0.00000 |       0.00257 |       0.00759 |       1.05012
     -0.01174 |       0.00000 |       0.00240 |       0.00914 |       1.04474
     -0.01367 |       0.00000 |       0.00228 |       0.00664 |       1.05313
     -0.01312 |       0.00000 |       0.00221 |       0.00801 |       1.05208
     -0.01555 |       0.00000 |       0.00211 |       0.00758 |       1.05267
     -0.01678 |       0.00000 |       0.00208 |       0.00815 |       1.05774
     -0.01782 |       0.00000 |       0.00195 |       0.00906 |       1.06139
     -0.01653 |       0.00000 |       0.00190 |       0.01036 |       1.05688
Evaluating losses...
     -0.01854 |       0.00000 |       0.00189 |       0.01041 |      

     -0.01354 |       0.00000 |       0.00248 |       0.00864 |       1.10365
     -0.01323 |       0.00000 |       0.00247 |       0.00981 |       1.10679
Evaluating losses...
     -0.01545 |       0.00000 |       0.00240 |       0.00867 |       1.10604
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.5         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1682         |
| TimeElapsed     | 3.65e+03     |
| TimestepsSoFar  | 2998272      |
| ev_tdlam_before | 0.427        |
| loss_ent        | 1.1060373    |
| loss_kl         | 0.008671435  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01545223  |
| loss_vf_loss    | 0.0023958078 |
----------------------------------
********** Iteration 732 ************
Eval num_timesteps=2998272, episode_reward=-0.45 +/- 0.91
Episode length: 3000.00 +/- 0.00
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00366 |       0.00000 |   

********** Iteration 737 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00156 |       0.00000 |       0.00417 |       0.00227 |       1.14704
     -0.00579 |       0.00000 |       0.00355 |       0.00459 |       1.14750
     -0.00888 |       0.00000 |       0.00323 |       0.00431 |       1.14487
     -0.01073 |       0.00000 |       0.00303 |       0.00597 |       1.14121
     -0.01241 |       0.00000 |       0.00290 |       0.00593 |       1.14496
     -0.01330 |       0.00000 |       0.00286 |       0.00600 |       1.14718
     -0.01407 |       0.00000 |       0.00274 |       0.00675 |       1.14522
     -0.01545 |       0.00000 |       0.00265 |       0.00724 |       1.14638
     -0.01611 |       0.00000 |       0.00260 |       0.00791 |       1.14759
     -0.01708 |       0.00000 |       0.00251 |       0.00797 |       1.14941
Evaluating losses...
     -0.01830 |       0.00000 |       0.00241 |       0.00863 |      

     -0.01243 |       0.00000 |       0.00051 |       0.00757 |       1.09747
     -0.01315 |       0.00000 |       0.00049 |       0.00804 |       1.09541
     -0.01420 |       0.00000 |       0.00049 |       0.00842 |       1.09726
Evaluating losses...
     -0.01584 |       0.00000 |       0.00045 |       0.00859 |       1.09613
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.38        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1697         |
| TimeElapsed     | 3.83e+03     |
| TimestepsSoFar  | 3043328      |
| ev_tdlam_before | 0.407        |
| loss_ent        | 1.0961263    |
| loss_kl         | 0.008585748  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015838679 |
| loss_vf_loss    | 0.0004471231 |
----------------------------------
********** Iteration 743 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00063 |       0.00000 |       0.00678 |

********** Iteration 748 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00064 |       0.00000 |       0.00936 |       0.00505 |       1.15336
     -0.00882 |       0.00000 |       0.00816 |       0.00491 |       1.14842
     -0.01066 |       0.00000 |       0.00745 |       0.00598 |       1.14424
     -0.01275 |       0.00000 |       0.00689 |       0.00592 |       1.14814
     -0.01399 |       0.00000 |       0.00656 |       0.00680 |       1.15139
     -0.01497 |       0.00000 |       0.00636 |       0.00731 |       1.14417
     -0.01622 |       0.00000 |       0.00619 |       0.00858 |       1.14578
     -0.01702 |       0.00000 |       0.00595 |       0.00771 |       1.14690
     -0.01803 |       0.00000 |       0.00582 |       0.00778 |       1.14555
     -0.01836 |       0.00000 |       0.00570 |       0.00913 |       1.14767
Evaluating losses...
     -0.02010 |       0.00000 |       0.00550 |       0.00952 |      

     -0.01236 |       0.00000 |       0.00101 |       0.00654 |       1.20594
     -0.01292 |       0.00000 |       0.00100 |       0.00704 |       1.21131
Evaluating losses...
     -0.01359 |       0.00000 |       0.00093 |       0.00762 |       1.20552
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.44        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1712         |
| TimeElapsed     | 3.87e+03     |
| TimestepsSoFar  | 3088384      |
| ev_tdlam_before | 0.532        |
| loss_ent        | 1.2055234    |
| loss_kl         | 0.007616293  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013587961 |
| loss_vf_loss    | 0.0009338209 |
----------------------------------
********** Iteration 754 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00013 |       0.00000 |       0.00351 |       0.00472 |       1.19696
     -0.00363 |       0.00000 |       0.00183 |

********** Iteration 759 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00339 |       0.00000 |       0.00414 |       0.00213 |       1.19203
     -0.00588 |       0.00000 |       0.00379 |       0.00564 |       1.20562
     -0.01063 |       0.00000 |       0.00356 |       0.00609 |       1.20495
     -0.01064 |       0.00000 |       0.00342 |       0.00765 |       1.20024
     -0.01333 |       0.00000 |       0.00329 |       0.00953 |       1.20809
     -0.01460 |       0.00000 |       0.00314 |       0.00736 |       1.19809
     -0.01551 |       0.00000 |       0.00306 |       0.00947 |       1.20005
     -0.01534 |       0.00000 |       0.00288 |       0.00954 |       1.19761
     -0.01691 |       0.00000 |       0.00281 |       0.00936 |       1.19757
     -0.01670 |       0.00000 |       0.00278 |       0.01065 |       1.19762
Evaluating losses...
     -0.01868 |       0.00000 |       0.00295 |       0.01123 |      

     -0.01476 |       0.00000 |       0.00162 |       0.00711 |       1.14894
     -0.01515 |       0.00000 |       0.00163 |       0.00732 |       1.14852
     -0.01578 |       0.00000 |       0.00162 |       0.00707 |       1.14780
Evaluating losses...
     -0.01687 |       0.00000 |       0.00154 |       0.00717 |       1.14507
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.39        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1727         |
| TimeElapsed     | 3.9e+03      |
| TimestepsSoFar  | 3133440      |
| ev_tdlam_before | 0.677        |
| loss_ent        | 1.1450708    |
| loss_kl         | 0.0071682287 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016865622 |
| loss_vf_loss    | 0.0015400577 |
----------------------------------
********** Iteration 765 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00012 |       0.00000 |       0.00126 |

********** Iteration 770 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00213 |       0.00000 |       0.00282 |       0.00862 |       1.13879
     -0.00737 |       0.00000 |       0.00224 |       0.00668 |       1.13556
     -0.01066 |       0.00000 |       0.00207 |       0.00694 |       1.13268
     -0.01238 |       0.00000 |       0.00193 |       0.00818 |       1.12720
     -0.01298 |       0.00000 |       0.00179 |       0.00844 |       1.12669
     -0.01386 |       0.00000 |       0.00175 |       0.00841 |       1.12464
     -0.01408 |       0.00000 |       0.00164 |       0.00819 |       1.12117
     -0.01481 |       0.00000 |       0.00154 |       0.01046 |       1.12049
     -0.01523 |       0.00000 |       0.00149 |       0.00976 |       1.11967
     -0.01543 |       0.00000 |       0.00146 |       0.01049 |       1.11661
Evaluating losses...
     -0.01637 |       0.00000 |       0.00142 |       0.00921 |      

     -0.01857 |       0.00000 |       0.00226 |       0.01593 |       1.15850
     -0.01931 |       0.00000 |       0.00221 |       0.01518 |       1.15819
Evaluating losses...
     -0.02103 |       0.00000 |       0.00217 |       0.01565 |       1.15980
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.44        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1742         |
| TimeElapsed     | 3.94e+03     |
| TimestepsSoFar  | 3178496      |
| ev_tdlam_before | 0.478        |
| loss_ent        | 1.1598018    |
| loss_kl         | 0.015648182  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.021030135 |
| loss_vf_loss    | 0.0021729812 |
----------------------------------
********** Iteration 776 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00081 |       0.00000 |       0.00107 |       0.00213 |       1.12114
     -0.00589 |       0.00000 |       0.00065 |

********** Iteration 781 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00579 |       0.00000 |       0.00086 |       0.00592 |       1.09563
     -0.00107 |       0.00000 |       0.00047 |       0.00487 |       1.10038
     -0.00508 |       0.00000 |       0.00044 |       0.00388 |       1.08872
     -0.00595 |       0.00000 |       0.00041 |       0.00458 |       1.09257
     -0.00733 |       0.00000 |       0.00042 |       0.00454 |       1.08939
     -0.00692 |       0.00000 |       0.00040 |       0.00601 |       1.09548
     -0.00924 |       0.00000 |       0.00038 |       0.00583 |       1.08791
     -0.00916 |       0.00000 |       0.00037 |       0.00637 |       1.08939
     -0.01116 |       0.00000 |       0.00036 |       0.00647 |       1.08752
     -0.00954 |       0.00000 |       0.00036 |       0.00779 |       1.08769
Evaluating losses...
     -0.01273 |       0.00000 |       0.00033 |       0.00698 |      

     -0.00896 |       0.00000 |       0.00047 |       0.00652 |       1.09146
     -0.00955 |       0.00000 |       0.00046 |       0.00741 |       1.08765
     -0.01042 |       0.00000 |       0.00045 |       0.00869 |       1.08750
Evaluating losses...
     -0.01243 |       0.00000 |       0.00041 |       0.00857 |       1.08982
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.43         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1757          |
| TimeElapsed     | 3.97e+03      |
| TimestepsSoFar  | 3223552       |
| ev_tdlam_before | 0.653         |
| loss_ent        | 1.0898211     |
| loss_kl         | 0.008570614   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.01242738   |
| loss_vf_loss    | 0.00040773014 |
-----------------------------------
********** Iteration 787 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00147 |       0.00000 |  

********** Iteration 792 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00048 |       0.00000 |       0.00439 |       0.00392 |       1.10178
     -0.00487 |       0.00000 |       0.00423 |       0.01140 |       1.08688
     -0.01070 |       0.00000 |       0.00402 |       0.01073 |       1.08559
     -0.01165 |       0.00000 |       0.00390 |       0.00964 |       1.09156
     -0.01401 |       0.00000 |       0.00386 |       0.01060 |       1.09737
     -0.01489 |       0.00000 |       0.00381 |       0.00919 |       1.09798
     -0.01490 |       0.00000 |       0.00376 |       0.01124 |       1.09800
     -0.01579 |       0.00000 |       0.00373 |       0.01026 |       1.10384
     -0.01687 |       0.00000 |       0.00366 |       0.01128 |       1.10216
     -0.01751 |       0.00000 |       0.00360 |       0.01101 |       1.10329
Evaluating losses...
     -0.01820 |       0.00000 |       0.00346 |       0.01259 |      

     -0.02090 |       0.00000 |       0.00011 |       0.00736 |       1.12305
     -0.02098 |       0.00000 |       0.00010 |       0.00819 |       1.11951
     -0.02287 |       0.00000 |      9.78e-05 |       0.00862 |       1.12010
Evaluating losses...
     -0.02483 |       0.00000 |      9.37e-05 |       0.00880 |       1.12597
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.62        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1772         |
| TimeElapsed     | 4.16e+03     |
| TimestepsSoFar  | 3268608      |
| ev_tdlam_before | -2.68        |
| loss_ent        | 1.1259723    |
| loss_kl         | 0.008797273  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.024829026 |
| loss_vf_loss    | 9.365356e-05 |
----------------------------------
********** Iteration 798 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00074 |       0.00000 |       0.00020 |

********** Iteration 803 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00044 |       0.00000 |       0.00022 |       0.00355 |       1.13795
     -0.01173 |       0.00000 |       0.00012 |       0.00636 |       1.12994
     -0.01533 |       0.00000 |      9.64e-05 |       0.00627 |       1.12759
     -0.01760 |       0.00000 |      8.02e-05 |       0.00815 |       1.12781
     -0.01939 |       0.00000 |      7.13e-05 |       0.00835 |       1.13341
     -0.02137 |       0.00000 |      6.56e-05 |       0.00850 |       1.12923
     -0.02274 |       0.00000 |      6.06e-05 |       0.00917 |       1.12788
     -0.02279 |       0.00000 |      5.75e-05 |       0.01094 |       1.12740
     -0.02369 |       0.00000 |      5.49e-05 |       0.01177 |       1.13349
     -0.02479 |       0.00000 |      5.16e-05 |       0.01228 |       1.13297
Evaluating losses...
     -0.02750 |       0.00000 |      4.95e-05 |       0.01200 |      

     -0.01475 |       0.00000 |       0.00149 |       0.00856 |       1.09474
     -0.01527 |       0.00000 |       0.00144 |       0.01578 |       1.09696
     -0.01681 |       0.00000 |       0.00146 |       0.01237 |       1.09814
Evaluating losses...
     -0.01750 |       0.00000 |       0.00136 |       0.01210 |       1.09345
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.53        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1787         |
| TimeElapsed     | 4.2e+03      |
| TimestepsSoFar  | 3313664      |
| ev_tdlam_before | 0.625        |
| loss_ent        | 1.0934519    |
| loss_kl         | 0.01210033   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01750022  |
| loss_vf_loss    | 0.0013596246 |
----------------------------------
********** Iteration 809 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00023 |       0.00000 |       0.00236 |

********** Iteration 814 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00099 |       0.00000 |       0.00398 |       0.00288 |       1.12879
     -0.00053 |       0.00000 |       0.00307 |       0.00589 |       1.12898
     -0.00897 |       0.00000 |       0.00276 |       0.00674 |       1.12860
     -0.01037 |       0.00000 |       0.00257 |       0.00597 |       1.14154
     -0.01243 |       0.00000 |       0.00241 |       0.00609 |       1.13570
     -0.01311 |       0.00000 |       0.00231 |       0.00684 |       1.13628
     -0.01442 |       0.00000 |       0.00222 |       0.00705 |       1.13723
     -0.01563 |       0.00000 |       0.00213 |       0.00771 |       1.13880
     -0.01522 |       0.00000 |       0.00208 |       0.00813 |       1.13259
     -0.01613 |       0.00000 |       0.00204 |       0.00832 |       1.13731
Evaluating losses...
     -0.01828 |       0.00000 |       0.00194 |       0.00848 |      

     -0.01837 |       0.00000 |       0.00489 |       0.00779 |       1.09817
     -0.01818 |       0.00000 |       0.00480 |       0.00848 |       1.09272
Evaluating losses...
     -0.01913 |       0.00000 |       0.00463 |       0.00890 |       1.09724
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.48        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1802         |
| TimeElapsed     | 4.23e+03     |
| TimestepsSoFar  | 3358720      |
| ev_tdlam_before | 0.385        |
| loss_ent        | 1.0972369    |
| loss_kl         | 0.008899353  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019126534 |
| loss_vf_loss    | 0.004630036  |
----------------------------------
********** Iteration 820 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00177 |       0.00000 |       0.00293 |       0.00278 |       1.10123
     -0.00568 |       0.00000 |       0.00273 |

********** Iteration 825 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00095 |       0.00000 |       0.00356 |       0.00363 |       1.06244
     3.45e-05 |       0.00000 |       0.00229 |       0.01030 |       1.05753
     -0.00590 |       0.00000 |       0.00187 |       0.00741 |       1.05990
     -0.00949 |       0.00000 |       0.00172 |       0.00824 |       1.06389
     -0.01171 |       0.00000 |       0.00167 |       0.00751 |       1.06408
     -0.01311 |       0.00000 |       0.00162 |       0.00912 |       1.06209
     -0.01344 |       0.00000 |       0.00157 |       0.01058 |       1.06459
     -0.01508 |       0.00000 |       0.00149 |       0.01123 |       1.06297
     -0.01593 |       0.00000 |       0.00147 |       0.01181 |       1.06497
     -0.01644 |       0.00000 |       0.00147 |       0.01163 |       1.06270
Evaluating losses...
     -0.01760 |       0.00000 |       0.00143 |       0.01112 |      

     -0.01307 |       0.00000 |       0.00018 |       0.00916 |       1.12019
     -0.01397 |       0.00000 |       0.00017 |       0.00911 |       1.11696
Evaluating losses...
     -0.01495 |       0.00000 |       0.00018 |       0.00918 |       1.11796
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.59        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1817         |
| TimeElapsed     | 4.27e+03     |
| TimestepsSoFar  | 3403776      |
| ev_tdlam_before | -3.18        |
| loss_ent        | 1.1179581    |
| loss_kl         | 0.0091781495 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01494834  |
| loss_vf_loss    | 0.0001833931 |
----------------------------------
********** Iteration 831 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00153 |       0.00000 |       0.00211 |       0.00404 |       1.11894
     -0.00819 |       0.00000 |       0.00182 |

********** Iteration 836 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00218 |       0.00000 |       0.00655 |       0.00484 |       1.09384
     -0.00489 |       0.00000 |       0.00532 |       0.00677 |       1.09314
     -0.00784 |       0.00000 |       0.00468 |       0.00490 |       1.09703
     -0.01015 |       0.00000 |       0.00413 |       0.00715 |       1.09750
     -0.01132 |       0.00000 |       0.00372 |       0.00798 |       1.09597
     -0.01200 |       0.00000 |       0.00361 |       0.01002 |       1.09622
     -0.01307 |       0.00000 |       0.00346 |       0.00909 |       1.09512
     -0.01388 |       0.00000 |       0.00342 |       0.00920 |       1.09795
     -0.01411 |       0.00000 |       0.00332 |       0.01074 |       1.09484
     -0.01393 |       0.00000 |       0.00326 |       0.01177 |       1.09719
Evaluating losses...
     -0.01594 |       0.00000 |       0.00315 |       0.01196 |      

     -0.01554 |       0.00000 |       0.00192 |       0.00993 |       1.07234
     -0.01642 |       0.00000 |       0.00194 |       0.01172 |       1.07169
Evaluating losses...
     -0.01787 |       0.00000 |       0.00185 |       0.01175 |       1.06984
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.66        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1832         |
| TimeElapsed     | 4.31e+03     |
| TimestepsSoFar  | 3448832      |
| ev_tdlam_before | 0.46         |
| loss_ent        | 1.0698416    |
| loss_kl         | 0.011746792  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.017867528 |
| loss_vf_loss    | 0.001852518  |
----------------------------------
********** Iteration 842 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00114 |       0.00000 |       0.00626 |       0.00206 |       1.04243
     -0.00590 |       0.00000 |       0.00457 |

********** Iteration 847 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00292 |       0.00000 |       0.00451 |       0.00435 |       1.08690
     -0.00557 |       0.00000 |       0.00379 |       0.00723 |       1.07816
     -0.00861 |       0.00000 |       0.00338 |       0.00626 |       1.09094
     -0.01075 |       0.00000 |       0.00327 |       0.00706 |       1.09033
     -0.01244 |       0.00000 |       0.00314 |       0.00823 |       1.09141
     -0.01320 |       0.00000 |       0.00313 |       0.00968 |       1.09539
     -0.01407 |       0.00000 |       0.00304 |       0.00933 |       1.09470
     -0.01582 |       0.00000 |       0.00299 |       0.00927 |       1.09621
     -0.01643 |       0.00000 |       0.00303 |       0.01008 |       1.09287
     -0.01569 |       0.00000 |       0.00289 |       0.01109 |       1.09516
Evaluating losses...
     -0.01690 |       0.00000 |       0.00283 |       0.01242 |      

     -0.00884 |       0.00000 |       0.00079 |       0.00719 |       1.12522
     -0.01015 |       0.00000 |       0.00078 |       0.00719 |       1.12554
     -0.01075 |       0.00000 |       0.00074 |       0.00740 |       1.12770
Evaluating losses...
     -0.01173 |       0.00000 |       0.00068 |       0.00777 |       1.12870
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.65        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1847         |
| TimeElapsed     | 4.34e+03     |
| TimestepsSoFar  | 3493888      |
| ev_tdlam_before | 0.183        |
| loss_ent        | 1.1287031    |
| loss_kl         | 0.0077728075 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.011732781 |
| loss_vf_loss    | 0.0006821985 |
----------------------------------
********** Iteration 853 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00145 |       0.00000 |       0.00082 |

********** Iteration 858 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00059 |       0.00000 |       0.00587 |       0.00436 |       1.09577
     -0.00864 |       0.00000 |       0.00400 |       0.00385 |       1.09078
     -0.01029 |       0.00000 |       0.00364 |       0.00588 |       1.09410
     -0.01316 |       0.00000 |       0.00333 |       0.00739 |       1.09627
     -0.01348 |       0.00000 |       0.00319 |       0.01233 |       1.10124
     -0.01447 |       0.00000 |       0.00301 |       0.00950 |       1.09607
     -0.01635 |       0.00000 |       0.00291 |       0.00852 |       1.09569
     -0.01650 |       0.00000 |       0.00279 |       0.00943 |       1.09581
     -0.01803 |       0.00000 |       0.00284 |       0.00940 |       1.09511
     -0.01821 |       0.00000 |       0.00271 |       0.01022 |       1.09544
Evaluating losses...
     -0.01891 |       0.00000 |       0.00250 |       0.01230 |      

     -0.01372 |       0.00000 |       0.00051 |       0.00873 |       1.06235
     -0.01479 |       0.00000 |       0.00049 |       0.00861 |       1.06091
Evaluating losses...
     -0.01558 |       0.00000 |       0.00047 |       0.00832 |       1.05868
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.59        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1861         |
| TimeElapsed     | 4.53e+03     |
| TimestepsSoFar  | 3538944      |
| ev_tdlam_before | 0.199        |
| loss_ent        | 1.0586777    |
| loss_kl         | 0.008323094  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015578665 |
| loss_vf_loss    | 0.000465026  |
----------------------------------
********** Iteration 864 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00122 |       0.00000 |       0.00377 |       0.00470 |       1.08212
     -0.00553 |       0.00000 |       0.00349 |

********** Iteration 869 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00345 |       0.00000 |       0.00380 |       0.00817 |       1.06245
     -0.00524 |       0.00000 |       0.00305 |       0.01104 |       1.06896
     -0.00771 |       0.00000 |       0.00281 |       0.00929 |       1.07711
     -0.00927 |       0.00000 |       0.00264 |       0.01052 |       1.07700
     -0.01011 |       0.00000 |       0.00250 |       0.01070 |       1.07417
     -0.01267 |       0.00000 |       0.00238 |       0.01070 |       1.07597
     -0.01343 |       0.00000 |       0.00231 |       0.01116 |       1.07780
     -0.01285 |       0.00000 |       0.00225 |       0.01293 |       1.08286
     -0.01462 |       0.00000 |       0.00224 |       0.01344 |       1.08145
     -0.01459 |       0.00000 |       0.00220 |       0.01422 |       1.08254
Evaluating losses...
     -0.01148 |       0.00000 |       0.00212 |       0.02215 |      

     -0.01673 |       0.00000 |       0.00512 |       0.00838 |       1.09153
     -0.01715 |       0.00000 |       0.00492 |       0.00887 |       1.08834
Evaluating losses...
     -0.01915 |       0.00000 |       0.00471 |       0.00848 |       1.08936
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.63        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1877         |
| TimeElapsed     | 4.57e+03     |
| TimestepsSoFar  | 3584000      |
| ev_tdlam_before | 0.458        |
| loss_ent        | 1.0893606    |
| loss_kl         | 0.0084785875 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019146983 |
| loss_vf_loss    | 0.0047115134 |
----------------------------------
********** Iteration 875 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00102 |       0.00000 |       0.00042 |       0.00298 |       1.10417
     -0.00628 |       0.00000 |       0.00025 |

********** Iteration 880 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00017 |       0.00000 |       0.00266 |       0.00589 |       1.08132
     -0.00597 |       0.00000 |       0.00216 |       0.00438 |       1.07854
     -0.00865 |       0.00000 |       0.00196 |       0.00558 |       1.08370
     -0.01229 |       0.00000 |       0.00184 |       0.00511 |       1.07772
     -0.01267 |       0.00000 |       0.00176 |       0.00520 |       1.08016
     -0.01413 |       0.00000 |       0.00169 |       0.00637 |       1.07882
     -0.01588 |       0.00000 |       0.00169 |       0.00710 |       1.07859
     -0.01645 |       0.00000 |       0.00166 |       0.00693 |       1.07634
     -0.01637 |       0.00000 |       0.00160 |       0.00927 |       1.07820
     -0.01728 |       0.00000 |       0.00160 |       0.00814 |       1.07776
Evaluating losses...
     -0.01925 |       0.00000 |       0.00153 |       0.00866 |      

     -0.01407 |       0.00000 |       0.00131 |       0.00806 |       1.08317
     -0.01473 |       0.00000 |       0.00128 |       0.00919 |       1.08501
Evaluating losses...
     -0.01649 |       0.00000 |       0.00123 |       0.00860 |       1.08600
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.76        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1892         |
| TimeElapsed     | 4.61e+03     |
| TimestepsSoFar  | 3629056      |
| ev_tdlam_before | 0.212        |
| loss_ent        | 1.0859985    |
| loss_kl         | 0.008597129  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016488727 |
| loss_vf_loss    | 0.0012298165 |
----------------------------------
********** Iteration 886 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00097 |       0.00000 |       0.00536 |       0.00525 |       1.07310
     -0.00233 |       0.00000 |       0.00447 |

********** Iteration 891 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00100 |       0.00000 |       0.00468 |       0.00538 |       1.05809
     -0.00907 |       0.00000 |       0.00401 |       0.00499 |       1.05734
     -0.01100 |       0.00000 |       0.00368 |       0.00728 |       1.06284
     -0.01318 |       0.00000 |       0.00349 |       0.00705 |       1.06176
     -0.01505 |       0.00000 |       0.00336 |       0.00706 |       1.05792
     -0.01569 |       0.00000 |       0.00319 |       0.00859 |       1.05881
     -0.01723 |       0.00000 |       0.00305 |       0.00886 |       1.05868
     -0.01739 |       0.00000 |       0.00293 |       0.00975 |       1.05615
     -0.01835 |       0.00000 |       0.00289 |       0.01123 |       1.06115
     -0.01901 |       0.00000 |       0.00289 |       0.01091 |       1.05741
Evaluating losses...
     -0.02055 |       0.00000 |       0.00280 |       0.01174 |      

     -0.01456 |       0.00000 |       0.00076 |       0.00854 |       1.05401
     -0.01479 |       0.00000 |       0.00077 |       0.00841 |       1.05416
Evaluating losses...
     -0.01707 |       0.00000 |       0.00070 |       0.00827 |       1.05413
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.72        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1907         |
| TimeElapsed     | 4.65e+03     |
| TimestepsSoFar  | 3674112      |
| ev_tdlam_before | 0.116        |
| loss_ent        | 1.0541254    |
| loss_kl         | 0.008271385  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.017070454 |
| loss_vf_loss    | 0.0007038272 |
----------------------------------
********** Iteration 897 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00025 |       0.00000 |       0.00645 |       0.00384 |       1.07898
     -0.00625 |       0.00000 |       0.00495 |

********** Iteration 902 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00012 |       0.00000 |       0.00190 |       0.00245 |       1.13448
     -0.00630 |       0.00000 |       0.00132 |       0.00351 |       1.13274
     -0.00847 |       0.00000 |       0.00112 |       0.00393 |       1.13436
     -0.00945 |       0.00000 |       0.00102 |       0.00443 |       1.13408
     -0.01038 |       0.00000 |       0.00095 |       0.00523 |       1.13571
     -0.01192 |       0.00000 |       0.00092 |       0.00574 |       1.13600
     -0.01335 |       0.00000 |       0.00091 |       0.00676 |       1.13496
     -0.01314 |       0.00000 |       0.00091 |       0.00643 |       1.13636
     -0.01403 |       0.00000 |       0.00083 |       0.00702 |       1.13550
     -0.01451 |       0.00000 |       0.00083 |       0.00770 |       1.13694
Evaluating losses...
     -0.01597 |       0.00000 |       0.00082 |       0.00779 |      

     -0.01771 |       0.00000 |      7.35e-05 |       0.01017 |       1.11015
     -0.01810 |       0.00000 |      7.13e-05 |       0.01019 |       1.10958
Evaluating losses...
     -0.02010 |       0.00000 |      6.86e-05 |       0.01019 |       1.10809
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.7         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 1922         |
| TimeElapsed     | 4.68e+03     |
| TimestepsSoFar  | 3719168      |
| ev_tdlam_before | -2.94        |
| loss_ent        | 1.1080933    |
| loss_kl         | 0.010186907  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020098375 |
| loss_vf_loss    | 6.862106e-05 |
----------------------------------
********** Iteration 908 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00262 |       0.00000 |       0.00400 |       0.00098 |       1.04258
     -0.00429 |       0.00000 |       0.00246 |

********** Iteration 913 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00126 |       0.00000 |       0.00023 |       0.00265 |       1.09402
     -0.00600 |       0.00000 |       0.00015 |       0.00561 |       1.09351
     -0.00801 |       0.00000 |       0.00013 |       0.00651 |       1.09430
     -0.01197 |       0.00000 |       0.00012 |       0.00633 |       1.09565
     -0.01278 |       0.00000 |       0.00012 |       0.00750 |       1.09192
     -0.01391 |       0.00000 |       0.00011 |       0.00843 |       1.09088
     -0.01515 |       0.00000 |       0.00011 |       0.00891 |       1.09181
     -0.01629 |       0.00000 |       0.00010 |       0.01043 |       1.08678
     -0.01760 |       0.00000 |       0.00010 |       0.01185 |       1.08952
     -0.01710 |       0.00000 |      9.94e-05 |       0.01221 |       1.08576
Evaluating losses...
     -0.01969 |       0.00000 |      9.55e-05 |       0.01239 |      

     -0.01174 |       0.00000 |       0.00088 |       0.00724 |       1.09536
     -0.01276 |       0.00000 |       0.00085 |       0.00780 |       1.09493
     -0.01371 |       0.00000 |       0.00081 |       0.00817 |       1.09611
Evaluating losses...
     -0.01503 |       0.00000 |       0.00078 |       0.00852 |       1.09290
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.67         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1936          |
| TimeElapsed     | 4.86e+03      |
| TimestepsSoFar  | 3764224       |
| ev_tdlam_before | 0.586         |
| loss_ent        | 1.0929027     |
| loss_kl         | 0.008518713   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.015027523  |
| loss_vf_loss    | 0.00078495644 |
-----------------------------------
********** Iteration 919 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00553 |       0.00000 |  

********** Iteration 924 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00209 |       0.00000 |       0.00059 |       0.00295 |       1.09022
     -0.00312 |       0.00000 |       0.00034 |       0.00335 |       1.09106
     -0.00650 |       0.00000 |       0.00029 |       0.00641 |       1.09185
     -0.00648 |       0.00000 |       0.00029 |       0.00724 |       1.09257
     -0.00784 |       0.00000 |       0.00027 |       0.00658 |       1.09853
     -0.00955 |       0.00000 |       0.00026 |       0.00709 |       1.09893
     -0.00948 |       0.00000 |       0.00027 |       0.00895 |       1.09730
     -0.01021 |       0.00000 |       0.00026 |       0.00942 |       1.10101
     -0.01174 |       0.00000 |       0.00025 |       0.00913 |       1.09919
     -0.01028 |       0.00000 |       0.00025 |       0.01144 |       1.10015
Evaluating losses...
     -0.01263 |       0.00000 |       0.00025 |       0.01044 |      

     -0.01831 |       0.00000 |       0.00140 |       0.01013 |       1.07522
     -0.01896 |       0.00000 |       0.00138 |       0.01047 |       1.07888
     -0.01926 |       0.00000 |       0.00136 |       0.01111 |       1.07217
Evaluating losses...
     -0.02029 |       0.00000 |       0.00129 |       0.01211 |       1.07963
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.66        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1951         |
| TimeElapsed     | 4.9e+03      |
| TimestepsSoFar  | 3809280      |
| ev_tdlam_before | 0.435        |
| loss_ent        | 1.0796275    |
| loss_kl         | 0.012109271  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.02029189  |
| loss_vf_loss    | 0.0012878226 |
----------------------------------
********** Iteration 930 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00197 |       0.00000 |       0.00168 |

********** Iteration 935 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00107 |       0.00000 |       0.00044 |       0.00216 |       1.09014
     -0.00644 |       0.00000 |       0.00025 |       0.00329 |       1.08932
     -0.01000 |       0.00000 |       0.00018 |       0.00476 |       1.09279
     -0.01110 |       0.00000 |       0.00016 |       0.00575 |       1.08873
     -0.01262 |       0.00000 |       0.00014 |       0.00600 |       1.08645
     -0.01325 |       0.00000 |       0.00013 |       0.00625 |       1.08607
     -0.01445 |       0.00000 |       0.00012 |       0.00650 |       1.08712
     -0.01512 |       0.00000 |       0.00011 |       0.00635 |       1.08201
     -0.01582 |       0.00000 |       0.00011 |       0.00737 |       1.08522
     -0.01609 |       0.00000 |       0.00010 |       0.00695 |       1.08342
Evaluating losses...
     -0.01780 |       0.00000 |       0.00010 |       0.00702 |      

     -0.01552 |       0.00000 |       0.00117 |       0.00728 |       1.06749
     -0.01571 |       0.00000 |       0.00115 |       0.00707 |       1.06694
     -0.01702 |       0.00000 |       0.00113 |       0.00838 |       1.06562
Evaluating losses...
     -0.01817 |       0.00000 |       0.00109 |       0.00919 |       1.06537
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.67        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1966         |
| TimeElapsed     | 4.94e+03     |
| TimestepsSoFar  | 3854336      |
| ev_tdlam_before | 0.766        |
| loss_ent        | 1.0653701    |
| loss_kl         | 0.009190661  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018170547 |
| loss_vf_loss    | 0.0010892244 |
----------------------------------
********** Iteration 941 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00057 |       0.00000 |       0.00310 |

********** Iteration 946 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00128 |       0.00000 |       0.00068 |       0.00237 |       1.08200
     -0.00666 |       0.00000 |       0.00047 |       0.00540 |       1.07602
     -0.01013 |       0.00000 |       0.00036 |       0.00453 |       1.07916
     -0.01207 |       0.00000 |       0.00030 |       0.00580 |       1.07671
     -0.01444 |       0.00000 |       0.00026 |       0.00693 |       1.07586
     -0.01613 |       0.00000 |       0.00023 |       0.00671 |       1.07824
     -0.01661 |       0.00000 |       0.00021 |       0.00741 |       1.07964
     -0.01768 |       0.00000 |       0.00019 |       0.00851 |       1.07695
     -0.01882 |       0.00000 |       0.00018 |       0.00944 |       1.07933
     -0.02002 |       0.00000 |       0.00017 |       0.00968 |       1.07686
Evaluating losses...
     -0.02143 |       0.00000 |       0.00016 |       0.01173 |      

     -0.01361 |       0.00000 |       0.00049 |       0.00602 |       1.09872
     -0.01398 |       0.00000 |       0.00047 |       0.00707 |       1.09718
     -0.01421 |       0.00000 |       0.00049 |       0.00732 |       1.09534
Evaluating losses...
     -0.01602 |       0.00000 |       0.00041 |       0.00745 |       1.10051
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.56         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 1981          |
| TimeElapsed     | 4.97e+03      |
| TimestepsSoFar  | 3899392       |
| ev_tdlam_before | 0.544         |
| loss_ent        | 1.1005138     |
| loss_kl         | 0.007450193   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.016022496  |
| loss_vf_loss    | 0.00041455822 |
-----------------------------------
********** Iteration 952 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00128 |       0.00000 |  

********** Iteration 957 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00267 |       0.00000 |       0.00529 |       0.00256 |       1.07657
     -0.00430 |       0.00000 |       0.00379 |       0.00360 |       1.06922
     -0.00747 |       0.00000 |       0.00357 |       0.00387 |       1.06669
     -0.00943 |       0.00000 |       0.00335 |       0.00433 |       1.06902
     -0.01129 |       0.00000 |       0.00325 |       0.00551 |       1.07003
     -0.01184 |       0.00000 |       0.00326 |       0.00583 |       1.07009
     -0.01391 |       0.00000 |       0.00306 |       0.00628 |       1.07228
     -0.01399 |       0.00000 |       0.00308 |       0.00727 |       1.07167
     -0.01489 |       0.00000 |       0.00301 |       0.00851 |       1.07323
     -0.01584 |       0.00000 |       0.00291 |       0.00892 |       1.07168
Evaluating losses...
     -0.01806 |       0.00000 |       0.00279 |       0.00944 |      

     -0.01874 |       0.00000 |       0.00268 |       0.00977 |       1.08314
     -0.01862 |       0.00000 |       0.00265 |       0.01113 |       1.08020
Evaluating losses...
     -0.02063 |       0.00000 |       0.00246 |       0.01148 |       1.07780
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.51        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 1996         |
| TimeElapsed     | 5.01e+03     |
| TimestepsSoFar  | 3944448      |
| ev_tdlam_before | 0.623        |
| loss_ent        | 1.0778018    |
| loss_kl         | 0.011484292  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020634595 |
| loss_vf_loss    | 0.0024643985 |
----------------------------------
********** Iteration 963 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00028 |       0.00000 |       0.00034 |       0.00234 |       1.07305
     -0.00786 |       0.00000 |       0.00020 |

********** Iteration 968 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00107 |       0.00000 |       0.00852 |       0.00259 |       1.06349
     -0.00785 |       0.00000 |       0.00690 |       0.00746 |       1.07083
     -0.01086 |       0.00000 |       0.00650 |       0.00708 |       1.06717
     -0.01328 |       0.00000 |       0.00625 |       0.00816 |       1.06880
     -0.01461 |       0.00000 |       0.00589 |       0.00949 |       1.06457
     -0.01689 |       0.00000 |       0.00577 |       0.01110 |       1.06554
     -0.01773 |       0.00000 |       0.00566 |       0.01321 |       1.06888
     -0.01925 |       0.00000 |       0.00546 |       0.01282 |       1.07125
     -0.01818 |       0.00000 |       0.00542 |       0.01460 |       1.06731
     -0.01951 |       0.00000 |       0.00553 |       0.01377 |       1.06831
Evaluating losses...
     -0.02185 |       0.00000 |       0.00522 |       0.01288 |      

     -0.01683 |       0.00000 |       0.00125 |       0.00778 |       1.05323
     -0.01768 |       0.00000 |       0.00119 |       0.00810 |       1.05497
Evaluating losses...
     -0.01898 |       0.00000 |       0.00113 |       0.00788 |       1.05563
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.51        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2011         |
| TimeElapsed     | 5.04e+03     |
| TimestepsSoFar  | 3989504      |
| ev_tdlam_before | 0.431        |
| loss_ent        | 1.0556308    |
| loss_kl         | 0.007879522  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018981852 |
| loss_vf_loss    | 0.0011313418 |
----------------------------------
********** Iteration 974 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00128 |       0.00000 |       0.00019 |       0.00316 |       1.09671
     -0.00643 |       0.00000 |       0.00015 |

********** Iteration 979 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00011 |       0.00000 |       0.00637 |       0.00283 |       1.03256
     -0.00844 |       0.00000 |       0.00547 |       0.00477 |       1.02224
     -0.01168 |       0.00000 |       0.00513 |       0.00469 |       1.02680
     -0.01363 |       0.00000 |       0.00497 |       0.00563 |       1.03197
     -0.01444 |       0.00000 |       0.00485 |       0.00594 |       1.03158
     -0.01567 |       0.00000 |       0.00486 |       0.00687 |       1.02843
     -0.01700 |       0.00000 |       0.00477 |       0.00692 |       1.03520
     -0.01731 |       0.00000 |       0.00462 |       0.00799 |       1.03122
     -0.01855 |       0.00000 |       0.00459 |       0.00817 |       1.03105
     -0.01870 |       0.00000 |       0.00457 |       0.00878 |       1.03376
Evaluating losses...
     -0.01997 |       0.00000 |       0.00453 |       0.00872 |      

     -0.01542 |       0.00000 |       0.00293 |       0.00897 |       1.03607
     -0.01582 |       0.00000 |       0.00286 |       0.00945 |       1.03623
     -0.01679 |       0.00000 |       0.00279 |       0.00986 |       1.04055
Evaluating losses...
     -0.01850 |       0.00000 |       0.00269 |       0.01017 |       1.04009
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.53        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2026         |
| TimeElapsed     | 5.23e+03     |
| TimestepsSoFar  | 4034560      |
| ev_tdlam_before | 0.718        |
| loss_ent        | 1.0400912    |
| loss_kl         | 0.010174311  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018498184 |
| loss_vf_loss    | 0.0026902186 |
----------------------------------
********** Iteration 985 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00152 |       0.00000 |       0.00509 |

********** Iteration 990 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00163 |       0.00000 |      2.90e-05 |       0.00288 |       1.10680
     -0.00449 |       0.00000 |      2.58e-05 |       0.00372 |       1.11191
     -0.00961 |       0.00000 |      2.26e-05 |       0.00424 |       1.10687
     -0.01135 |       0.00000 |      2.15e-05 |       0.00533 |       1.10932
     -0.01450 |       0.00000 |      1.98e-05 |       0.00577 |       1.11104
     -0.01624 |       0.00000 |      1.82e-05 |       0.00655 |       1.10977
     -0.01747 |       0.00000 |      1.79e-05 |       0.00713 |       1.10969
     -0.01871 |       0.00000 |      1.74e-05 |       0.00778 |       1.11044
     -0.02004 |       0.00000 |      1.63e-05 |       0.00856 |       1.11415
     -0.02096 |       0.00000 |      1.57e-05 |       0.00899 |       1.11019
Evaluating losses...
     -0.02275 |       0.00000 |      1.45e-05 |       0.01011 |      

     -0.01652 |       0.00000 |       0.00316 |       0.01106 |       1.10600
     -0.01680 |       0.00000 |       0.00307 |       0.01175 |       1.10054
     -0.01740 |       0.00000 |       0.00296 |       0.01077 |       1.10684
Evaluating losses...
     -0.01855 |       0.00000 |       0.00282 |       0.01073 |       1.10660
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.44        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2041         |
| TimeElapsed     | 5.27e+03     |
| TimestepsSoFar  | 4079616      |
| ev_tdlam_before | 0.474        |
| loss_ent        | 1.1065955    |
| loss_kl         | 0.010729275  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01855357  |
| loss_vf_loss    | 0.0028237463 |
----------------------------------
********** Iteration 996 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00272 |       0.00000 |       0.00172 |

********** Iteration 1001 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00195 |       0.00000 |       0.00475 |       0.00283 |       1.08996
     -0.00587 |       0.00000 |       0.00333 |       0.00453 |       1.09007
     -0.01026 |       0.00000 |       0.00291 |       0.00452 |       1.08930
     -0.01191 |       0.00000 |       0.00265 |       0.00622 |       1.08964
     -0.01268 |       0.00000 |       0.00246 |       0.00617 |       1.09062
     -0.01446 |       0.00000 |       0.00237 |       0.00632 |       1.08833
     -0.01542 |       0.00000 |       0.00223 |       0.00706 |       1.08702
     -0.01663 |       0.00000 |       0.00216 |       0.00725 |       1.08902
     -0.01649 |       0.00000 |       0.00211 |       0.00719 |       1.08822
     -0.01737 |       0.00000 |       0.00201 |       0.00757 |       1.08818
Evaluating losses...
     -0.01804 |       0.00000 |       0.00191 |       0.00759 |     

     -0.02100 |       0.00000 |      2.02e-05 |       0.01013 |       1.14250
     -0.02119 |       0.00000 |      2.01e-05 |       0.01028 |       1.14091
Evaluating losses...
     -0.02451 |       0.00000 |      1.87e-05 |       0.01145 |       1.14159
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.53        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2056         |
| TimeElapsed     | 5.3e+03      |
| TimestepsSoFar  | 4124672      |
| ev_tdlam_before | -1.33        |
| loss_ent        | 1.14159      |
| loss_kl         | 0.011446994  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.024509527 |
| loss_vf_loss    | 1.872742e-05 |
----------------------------------
********** Iteration 1007 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -8.61e-05 |       0.00000 |       0.00100 |       0.00105 |       1.10991
     -0.00372 |       0.00000 |       0.00065 

********** Iteration 1012 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00086 |       0.00000 |       0.00217 |       0.00242 |       1.04216
     -0.00675 |       0.00000 |       0.00209 |       0.00454 |       1.03742
     -0.00727 |       0.00000 |       0.00208 |       0.00608 |       1.03507
     -0.00908 |       0.00000 |       0.00202 |       0.00545 |       1.04193
     -0.01030 |       0.00000 |       0.00210 |       0.00496 |       1.04015
     -0.01100 |       0.00000 |       0.00200 |       0.00696 |       1.04380
     -0.01094 |       0.00000 |       0.00197 |       0.00735 |       1.04473
     -0.01197 |       0.00000 |       0.00187 |       0.00801 |       1.04610
     -0.01272 |       0.00000 |       0.00173 |       0.00959 |       1.04615
     -0.01240 |       0.00000 |       0.00167 |       0.01101 |       1.04976
Evaluating losses...
     -0.01340 |       0.00000 |       0.00158 |       0.01216 |     

     -0.01611 |       0.00000 |       0.00370 |       0.00719 |       1.04388
     -0.01590 |       0.00000 |       0.00358 |       0.00863 |       1.04155
Evaluating losses...
     -0.01748 |       0.00000 |       0.00338 |       0.00832 |       1.04383
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.65        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2071         |
| TimeElapsed     | 5.34e+03     |
| TimestepsSoFar  | 4169728      |
| ev_tdlam_before | 0.686        |
| loss_ent        | 1.0438302    |
| loss_kl         | 0.008320631  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01747771  |
| loss_vf_loss    | 0.0033837135 |
----------------------------------
********** Iteration 1018 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00103 |       0.00000 |       0.00216 |       0.00481 |       1.13848
     -0.00807 |       0.00000 |       0.00188 

********** Iteration 1023 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00086 |       0.00000 |       0.00022 |       0.00440 |       1.08939
     -0.00487 |       0.00000 |       0.00012 |       0.00414 |       1.08582
     -0.00646 |       0.00000 |      9.48e-05 |       0.00365 |       1.09224
     -0.00811 |       0.00000 |      8.33e-05 |       0.00506 |       1.09605
     -0.00924 |       0.00000 |      7.70e-05 |       0.00484 |       1.09785
     -0.01101 |       0.00000 |      7.12e-05 |       0.00577 |       1.09818
     -0.01157 |       0.00000 |      6.87e-05 |       0.00704 |       1.09771
     -0.01199 |       0.00000 |      6.55e-05 |       0.00716 |       1.09942
     -0.01310 |       0.00000 |      6.33e-05 |       0.00772 |       1.09908
     -0.01403 |       0.00000 |      6.26e-05 |       0.00860 |       1.10028
Evaluating losses...
     -0.01572 |       0.00000 |      5.98e-05 |       0.00809 |     

     -0.01163 |       0.00000 |       0.00159 |       0.00841 |       1.11690
     -0.01198 |       0.00000 |       0.00154 |       0.00768 |       1.11682
     -0.01282 |       0.00000 |       0.00154 |       0.00843 |       1.11460
Evaluating losses...
     -0.01419 |       0.00000 |       0.00147 |       0.00830 |       1.11591
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.66        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2086         |
| TimeElapsed     | 5.37e+03     |
| TimestepsSoFar  | 4214784      |
| ev_tdlam_before | 0.646        |
| loss_ent        | 1.1159097    |
| loss_kl         | 0.008303508  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014185565 |
| loss_vf_loss    | 0.0014723248 |
----------------------------------
********** Iteration 1029 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00313 |       0.00000 |       0.00162 

********** Iteration 1034 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00141 |       0.00000 |       0.00060 |       0.00235 |       1.11081
     -0.00538 |       0.00000 |       0.00037 |       0.00502 |       1.11503
     -0.01098 |       0.00000 |       0.00030 |       0.00545 |       1.11196
     -0.01304 |       0.00000 |       0.00027 |       0.00741 |       1.11209
     -0.01492 |       0.00000 |       0.00024 |       0.00770 |       1.11396
     -0.01652 |       0.00000 |       0.00022 |       0.00790 |       1.11326
     -0.01782 |       0.00000 |       0.00021 |       0.00995 |       1.11498
     -0.01893 |       0.00000 |       0.00021 |       0.00964 |       1.11457
     -0.02058 |       0.00000 |       0.00019 |       0.01002 |       1.11560
     -0.01963 |       0.00000 |       0.00018 |       0.01185 |       1.11632
Evaluating losses...
     -0.02291 |       0.00000 |       0.00019 |       0.01082 |     

     -0.01372 |       0.00000 |      6.96e-05 |       0.00694 |       1.08873
     -0.01512 |       0.00000 |      6.77e-05 |       0.00707 |       1.08692
     -0.01531 |       0.00000 |      6.48e-05 |       0.00774 |       1.08726
     -0.01591 |       0.00000 |      6.17e-05 |       0.00822 |       1.08820
Evaluating losses...
     -0.01908 |       0.00000 |      5.98e-05 |       0.00781 |       1.08944
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.67        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2101         |
| TimeElapsed     | 5.56e+03     |
| TimestepsSoFar  | 4259840      |
| ev_tdlam_before | -4.52        |
| loss_ent        | 1.0894417    |
| loss_kl         | 0.0078107123 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01908323  |
| loss_vf_loss    | 5.982037e-05 |
----------------------------------
********** Iteration 1040 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 1045 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00190 |       0.00000 |       0.00431 |       0.00261 |       1.00762
     -0.00503 |       0.00000 |       0.00312 |       0.00349 |       1.01073
     -0.00831 |       0.00000 |       0.00284 |       0.00498 |       1.01264
     -0.00970 |       0.00000 |       0.00266 |       0.00593 |       1.01265
     -0.01129 |       0.00000 |       0.00256 |       0.00570 |       1.01725
     -0.01141 |       0.00000 |       0.00244 |       0.00565 |       1.01432
     -0.01276 |       0.00000 |       0.00247 |       0.00709 |       1.01815
     -0.01390 |       0.00000 |       0.00237 |       0.00685 |       1.01384
     -0.01465 |       0.00000 |       0.00232 |       0.00717 |       1.01548
     -0.01579 |       0.00000 |       0.00229 |       0.00732 |       1.01564
Evaluating losses...
     -0.01680 |       0.00000 |       0.00220 |       0.00721 |     

     -0.01701 |       0.00000 |       0.00143 |       0.00748 |       1.04450
     -0.01783 |       0.00000 |       0.00143 |       0.00749 |       1.04992
     -0.01851 |       0.00000 |       0.00139 |       0.00818 |       1.04867
Evaluating losses...
     -0.01970 |       0.00000 |       0.00135 |       0.00853 |       1.04972
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.56        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2116         |
| TimeElapsed     | 5.6e+03      |
| TimestepsSoFar  | 4304896      |
| ev_tdlam_before | 0.672        |
| loss_ent        | 1.0497208    |
| loss_kl         | 0.008529981  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019695438 |
| loss_vf_loss    | 0.0013470525 |
----------------------------------
********** Iteration 1051 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00201 |       0.00000 |       0.00669 

********** Iteration 1056 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00042 |       0.00000 |       0.00607 |       0.00323 |       1.06697
     -0.00732 |       0.00000 |       0.00298 |       0.00692 |       1.05802
     -0.01028 |       0.00000 |       0.00257 |       0.00961 |       1.05498
     -0.01201 |       0.00000 |       0.00235 |       0.00871 |       1.05988
     -0.01336 |       0.00000 |       0.00223 |       0.01079 |       1.05893
     -0.01469 |       0.00000 |       0.00220 |       0.01010 |       1.05992
     -0.01432 |       0.00000 |       0.00208 |       0.01048 |       1.06192
     -0.01533 |       0.00000 |       0.00201 |       0.01208 |       1.06042
     -0.01601 |       0.00000 |       0.00197 |       0.01170 |       1.06352
     -0.01668 |       0.00000 |       0.00192 |       0.01164 |       1.06395
Evaluating losses...
     -0.01774 |       0.00000 |       0.00182 |       0.01203 |     

     -0.02021 |       0.00000 |      5.33e-05 |       0.00806 |       1.10116
     -0.02032 |       0.00000 |      4.92e-05 |       0.00910 |       1.09997
     -0.02132 |       0.00000 |      4.61e-05 |       0.00906 |       1.09900
Evaluating losses...
     -0.02285 |       0.00000 |      4.69e-05 |       0.00942 |       1.09670
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.55         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2131          |
| TimeElapsed     | 5.64e+03      |
| TimestepsSoFar  | 4349952       |
| ev_tdlam_before | -0.778        |
| loss_ent        | 1.0966951     |
| loss_kl         | 0.00942334    |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.022847272  |
| loss_vf_loss    | 4.6855403e-05 |
-----------------------------------
********** Iteration 1062 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00017 |       0.00000 | 

********** Iteration 1067 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00191 |       0.00000 |       0.00038 |       0.00311 |       1.07135
     -0.00878 |       0.00000 |       0.00025 |       0.00434 |       1.07454
     -0.01281 |       0.00000 |       0.00020 |       0.00521 |       1.07078
     -0.01450 |       0.00000 |       0.00017 |       0.00611 |       1.07426
     -0.01613 |       0.00000 |       0.00015 |       0.00665 |       1.07389
     -0.01701 |       0.00000 |       0.00014 |       0.00631 |       1.07274
     -0.01816 |       0.00000 |       0.00013 |       0.00765 |       1.07249
     -0.01908 |       0.00000 |       0.00012 |       0.00809 |       1.07261
     -0.01998 |       0.00000 |       0.00012 |       0.00808 |       1.07312
     -0.02051 |       0.00000 |       0.00011 |       0.00890 |       1.07433
Evaluating losses...
     -0.02249 |       0.00000 |       0.00011 |       0.00910 |     

     -0.01033 |       0.00000 |       0.00110 |       0.00537 |       1.03807
     -0.01012 |       0.00000 |       0.00106 |       0.00586 |       1.03670
     -0.01159 |       0.00000 |       0.00101 |       0.00635 |       1.03930
Evaluating losses...
     -0.01204 |       0.00000 |       0.00100 |       0.00656 |       1.03997
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.48        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2146         |
| TimeElapsed     | 5.67e+03     |
| TimestepsSoFar  | 4395008      |
| ev_tdlam_before | 0.222        |
| loss_ent        | 1.0399693    |
| loss_kl         | 0.0065646768 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012039816 |
| loss_vf_loss    | 0.0009954796 |
----------------------------------
********** Iteration 1073 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00229 |       0.00000 |       0.00148 

********** Iteration 1078 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00075 |       0.00000 |       0.00469 |       0.00625 |       1.03773
     -0.00831 |       0.00000 |       0.00342 |       0.00620 |       1.03690
     -0.01155 |       0.00000 |       0.00293 |       0.00681 |       1.03432
     -0.01405 |       0.00000 |       0.00249 |       0.00589 |       1.03871
     -0.01469 |       0.00000 |       0.00202 |       0.00624 |       1.03689
     -0.01595 |       0.00000 |       0.00186 |       0.00699 |       1.03815
     -0.01608 |       0.00000 |       0.00168 |       0.00761 |       1.03604
     -0.01725 |       0.00000 |       0.00155 |       0.00805 |       1.03659
     -0.01814 |       0.00000 |       0.00146 |       0.00780 |       1.04115
     -0.01865 |       0.00000 |       0.00141 |       0.00852 |       1.04180
Evaluating losses...
     -0.01973 |       0.00000 |       0.00131 |       0.00778 |     

     -0.02123 |       0.00000 |      6.08e-05 |       0.00975 |       1.07423
     -0.02272 |       0.00000 |      5.76e-05 |       0.00949 |       1.07152
Evaluating losses...
     -0.02504 |       0.00000 |      5.57e-05 |       0.01050 |       1.07484
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.43        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2161         |
| TimeElapsed     | 5.71e+03     |
| TimestepsSoFar  | 4440064      |
| ev_tdlam_before | -1.6         |
| loss_ent        | 1.0748386    |
| loss_kl         | 0.0104976995 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.025038378 |
| loss_vf_loss    | 5.570831e-05 |
----------------------------------
********** Iteration 1084 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00083 |       0.00000 |       0.00014 |       0.00133 |       1.03497
     -0.00376 |       0.00000 |      8.14e-05 

********** Iteration 1089 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00092 |       0.00000 |       0.00358 |       0.00409 |       1.03124
     -0.00604 |       0.00000 |       0.00301 |       0.00536 |       1.03465
     -0.00962 |       0.00000 |       0.00283 |       0.00551 |       1.03237
     -0.01140 |       0.00000 |       0.00266 |       0.00533 |       1.03219
     -0.01282 |       0.00000 |       0.00250 |       0.00587 |       1.03214
     -0.01374 |       0.00000 |       0.00244 |       0.00566 |       1.03334
     -0.01452 |       0.00000 |       0.00236 |       0.00624 |       1.03218
     -0.01491 |       0.00000 |       0.00234 |       0.00742 |       1.03418
     -0.01561 |       0.00000 |       0.00227 |       0.00717 |       1.03161
     -0.01631 |       0.00000 |       0.00230 |       0.00782 |       1.03325
Evaluating losses...
     -0.01719 |       0.00000 |       0.00229 |       0.00813 |     

     -0.01236 |       0.00000 |       0.00013 |       0.00842 |       1.11659
     -0.01305 |       0.00000 |       0.00013 |       0.00915 |       1.11626
     -0.01343 |       0.00000 |       0.00013 |       0.01017 |       1.11264
Evaluating losses...
     -0.01406 |       0.00000 |       0.00012 |       0.01053 |       1.11262
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.31         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 2176          |
| TimeElapsed     | 5.74e+03      |
| TimestepsSoFar  | 4485120       |
| ev_tdlam_before | -1.15         |
| loss_ent        | 1.1126242     |
| loss_kl         | 0.010526204   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.014061719  |
| loss_vf_loss    | 0.00011786072 |
-----------------------------------
********** Iteration 1095 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00136 |       0.00000 | 

********** Iteration 1100 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00086 |       0.00000 |      8.87e-05 |       0.00488 |       1.04593
     -0.00790 |       0.00000 |      6.07e-05 |       0.00660 |       1.04228
     -0.01318 |       0.00000 |      4.93e-05 |       0.00670 |       1.04468
     -0.01512 |       0.00000 |      4.38e-05 |       0.00703 |       1.04280
     -0.01795 |       0.00000 |      4.13e-05 |       0.00704 |       1.04360
     -0.01976 |       0.00000 |      3.74e-05 |       0.00835 |       1.04163
     -0.02057 |       0.00000 |      3.56e-05 |       0.00911 |       1.04451
     -0.02182 |       0.00000 |      3.42e-05 |       0.00994 |       1.04538
     -0.02269 |       0.00000 |      3.18e-05 |       0.01047 |       1.04273
     -0.02408 |       0.00000 |      3.01e-05 |       0.01053 |       1.04471
Evaluating losses...
     -0.02583 |       0.00000 |      2.91e-05 |       0.01024 |     

     -0.00744 |       0.00000 |      9.24e-05 |       0.00656 |       1.04553
     -0.00702 |       0.00000 |      8.32e-05 |       0.00695 |       1.04526
     -0.00750 |       0.00000 |      7.79e-05 |       0.00679 |       1.04665
Evaluating losses...
     -0.00877 |       0.00000 |      7.33e-05 |       0.00696 |       1.04364
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.26         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2191          |
| TimeElapsed     | 5.93e+03      |
| TimestepsSoFar  | 4530176       |
| ev_tdlam_before | -2.51         |
| loss_ent        | 1.0436416     |
| loss_kl         | 0.0069554737  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0087728975 |
| loss_vf_loss    | 7.327284e-05  |
-----------------------------------
********** Iteration 1106 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00179 |       0.00000 | 

********** Iteration 1111 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00269 |       0.00000 |       0.00017 |       0.00482 |       1.06395
     -0.00422 |       0.00000 |      7.77e-05 |       0.00339 |       1.06162
     -0.00868 |       0.00000 |      5.91e-05 |       0.00402 |       1.05698
     -0.00866 |       0.00000 |      4.80e-05 |       0.00628 |       1.06142
     -0.01267 |       0.00000 |      4.33e-05 |       0.00552 |       1.06036
     -0.01415 |       0.00000 |      3.96e-05 |       0.00600 |       1.06276
     -0.01477 |       0.00000 |      3.72e-05 |       0.00687 |       1.06067
     -0.01600 |       0.00000 |      3.52e-05 |       0.00729 |       1.06127
     -0.01705 |       0.00000 |      3.41e-05 |       0.00854 |       1.06237
     -0.01817 |       0.00000 |      3.22e-05 |       0.00907 |       1.06132
Evaluating losses...
     -0.02005 |       0.00000 |      3.09e-05 |       0.00936 |     

     -0.02061 |       0.00000 |      8.33e-05 |       0.00721 |       1.08696
     -0.02050 |       0.00000 |      8.10e-05 |       0.00807 |       1.08496
Evaluating losses...
     -0.02236 |       0.00000 |      7.60e-05 |       0.00851 |       1.08589
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.27        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2206         |
| TimeElapsed     | 5.97e+03     |
| TimestepsSoFar  | 4575232      |
| ev_tdlam_before | -0.12        |
| loss_ent        | 1.085886     |
| loss_kl         | 0.008509976  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022364855 |
| loss_vf_loss    | 7.595273e-05 |
----------------------------------
********** Iteration 1117 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00246 |       0.00000 |       0.00107 |       0.00201 |       1.10314
     -0.00479 |       0.00000 |       0.00049 

********** Iteration 1122 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00295 |       0.00000 |       0.00340 |       0.00544 |       1.03926
     -0.00476 |       0.00000 |       0.00177 |       0.00578 |       1.04484
     -0.00561 |       0.00000 |       0.00137 |       0.00481 |       1.05062
     -0.00782 |       0.00000 |       0.00127 |       0.00654 |       1.04703
     -0.00863 |       0.00000 |       0.00120 |       0.00645 |       1.04904
     -0.01035 |       0.00000 |       0.00117 |       0.00641 |       1.05154
     -0.01058 |       0.00000 |       0.00116 |       0.00672 |       1.05212
     -0.01088 |       0.00000 |       0.00115 |       0.00795 |       1.05448
     -0.01225 |       0.00000 |       0.00112 |       0.00807 |       1.05332
     -0.01266 |       0.00000 |       0.00111 |       0.00824 |       1.05409
Evaluating losses...
     -0.01414 |       0.00000 |       0.00105 |       0.00805 |     

     -0.01309 |       0.00000 |       0.00153 |       0.00733 |       1.10992
     -0.01391 |       0.00000 |       0.00146 |       0.00750 |       1.10904
     -0.01418 |       0.00000 |       0.00148 |       0.00808 |       1.10869
Evaluating losses...
     -0.01519 |       0.00000 |       0.00142 |       0.00833 |       1.10736
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.25        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2221         |
| TimeElapsed     | 6e+03        |
| TimestepsSoFar  | 4620288      |
| ev_tdlam_before | 0.653        |
| loss_ent        | 1.1073607    |
| loss_kl         | 0.008325171  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015188065 |
| loss_vf_loss    | 0.0014222454 |
----------------------------------
********** Iteration 1128 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00220 |       0.00000 |       0.00036 

********** Iteration 1133 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00646 |       0.00000 |       0.00224 |       0.00431 |       1.06849
     -0.00484 |       0.00000 |       0.00187 |       0.00467 |       1.07348
     -0.00897 |       0.00000 |       0.00179 |       0.00577 |       1.07402
     -0.01035 |       0.00000 |       0.00163 |       0.00672 |       1.07398
     -0.01108 |       0.00000 |       0.00151 |       0.00669 |       1.06910
     -0.01180 |       0.00000 |       0.00147 |       0.00797 |       1.07321
     -0.01344 |       0.00000 |       0.00145 |       0.00754 |       1.07291
     -0.01466 |       0.00000 |       0.00137 |       0.00795 |       1.07119
     -0.01515 |       0.00000 |       0.00140 |       0.00773 |       1.07030
     -0.01469 |       0.00000 |       0.00134 |       0.00826 |       1.06843
Evaluating losses...
     -0.01678 |       0.00000 |       0.00128 |       0.00776 |     

     -0.01085 |       0.00000 |       0.00037 |       0.00479 |       1.10434
     -0.01162 |       0.00000 |       0.00037 |       0.00537 |       1.10482
     -0.01187 |       0.00000 |       0.00036 |       0.00542 |       1.10642
Evaluating losses...
     -0.01167 |       0.00000 |       0.00034 |       0.00672 |       1.10613
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.26        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2236         |
| TimeElapsed     | 6.03e+03     |
| TimestepsSoFar  | 4665344      |
| ev_tdlam_before | 0.563        |
| loss_ent        | 1.1061281    |
| loss_kl         | 0.006716314  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.011673491 |
| loss_vf_loss    | 0.0003405958 |
----------------------------------
********** Iteration 1139 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00043 |       0.00000 |      6.98e-05 

********** Iteration 1144 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00082 |       0.00000 |       0.00017 |       0.00185 |       1.08652
     -0.00472 |       0.00000 |      8.19e-05 |       0.00232 |       1.08044
     -0.00749 |       0.00000 |      6.67e-05 |       0.00334 |       1.08539
     -0.00936 |       0.00000 |      6.08e-05 |       0.00455 |       1.08254
     -0.01081 |       0.00000 |      5.53e-05 |       0.00485 |       1.08674
     -0.01184 |       0.00000 |      5.09e-05 |       0.00552 |       1.08719
     -0.01250 |       0.00000 |      4.87e-05 |       0.00634 |       1.08606
     -0.01338 |       0.00000 |      4.46e-05 |       0.00663 |       1.08560
     -0.01425 |       0.00000 |      4.25e-05 |       0.00714 |       1.08847
     -0.01445 |       0.00000 |      4.05e-05 |       0.00756 |       1.08897
Evaluating losses...
     -0.01605 |       0.00000 |      3.80e-05 |       0.00853 |     

     -0.01672 |       0.00000 |       0.00204 |       0.01100 |       1.05561
     -0.01711 |       0.00000 |       0.00205 |       0.01212 |       1.05590
Evaluating losses...
     -0.01820 |       0.00000 |       0.00198 |       0.01175 |       1.05525
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.38        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2251         |
| TimeElapsed     | 6.07e+03     |
| TimestepsSoFar  | 4710400      |
| ev_tdlam_before | 0.525        |
| loss_ent        | 1.0552485    |
| loss_kl         | 0.01174919   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018201416 |
| loss_vf_loss    | 0.0019823438 |
----------------------------------
********** Iteration 1150 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00101 |       0.00000 |       0.00032 |       0.00247 |       1.06288
     -0.00431 |       0.00000 |       0.00020 

********** Iteration 1155 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00234 |       0.00000 |       0.00130 |       0.00185 |       1.05685
     -0.00427 |       0.00000 |       0.00122 |       0.00782 |       1.04806
     -0.00841 |       0.00000 |       0.00112 |       0.00566 |       1.05399
     -0.00929 |       0.00000 |       0.00110 |       0.00611 |       1.05275
     -0.00967 |       0.00000 |       0.00106 |       0.00661 |       1.05640
     -0.01121 |       0.00000 |       0.00103 |       0.00798 |       1.05310
     -0.01184 |       0.00000 |       0.00103 |       0.00759 |       1.05371
     -0.01266 |       0.00000 |       0.00100 |       0.00715 |       1.05366
     -0.01401 |       0.00000 |       0.00100 |       0.00743 |       1.05686
     -0.01392 |       0.00000 |       0.00097 |       0.00810 |       1.05607
Evaluating losses...
     -0.01490 |       0.00000 |       0.00093 |       0.00779 |     

     -0.00872 |       0.00000 |       0.00047 |       0.00409 |       1.07611
     -0.00930 |       0.00000 |       0.00047 |       0.00424 |       1.07453
     -0.00977 |       0.00000 |       0.00044 |       0.00446 |       1.07492
     -0.01029 |       0.00000 |       0.00046 |       0.00468 |       1.07535
Evaluating losses...
     -0.01141 |       0.00000 |       0.00042 |       0.00537 |       1.07422
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.35        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2265         |
| TimeElapsed     | 6.25e+03     |
| TimestepsSoFar  | 4755456      |
| ev_tdlam_before | 0.478        |
| loss_ent        | 1.0742234    |
| loss_kl         | 0.005365199  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.011406988 |
| loss_vf_loss    | 0.0004219661 |
----------------------------------
********** Iteration 1161 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 1166 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00078 |       0.00000 |      5.80e-05 |       0.00346 |       1.09453
     -0.00666 |       0.00000 |      4.50e-05 |       0.00429 |       1.09813
     -0.01091 |       0.00000 |      3.75e-05 |       0.00545 |       1.09693
     -0.01262 |       0.00000 |      3.25e-05 |       0.00594 |       1.09902
     -0.01452 |       0.00000 |      2.97e-05 |       0.00592 |       1.09523
     -0.01639 |       0.00000 |      2.70e-05 |       0.00629 |       1.09688
     -0.01732 |       0.00000 |      2.56e-05 |       0.00654 |       1.09587
     -0.01879 |       0.00000 |      2.45e-05 |       0.00734 |       1.09464
     -0.01913 |       0.00000 |      2.34e-05 |       0.00813 |       1.09577
     -0.02045 |       0.00000 |      2.32e-05 |       0.00804 |       1.09377
Evaluating losses...
     -0.02258 |       0.00000 |      2.48e-05 |       0.00802 |     

     -0.01497 |       0.00000 |       0.00133 |       0.00777 |       1.09386
     -0.01559 |       0.00000 |       0.00127 |       0.00831 |       1.09436
Evaluating losses...
     -0.01639 |       0.00000 |       0.00120 |       0.00777 |       1.09232
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.34        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2280         |
| TimeElapsed     | 6.28e+03     |
| TimestepsSoFar  | 4800512      |
| ev_tdlam_before | 0.138        |
| loss_ent        | 1.0923169    |
| loss_kl         | 0.007772066  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016388489 |
| loss_vf_loss    | 0.0011983938 |
----------------------------------
********** Iteration 1172 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00047 |       0.00000 |       0.00393 |       0.00245 |       1.06994
     -0.00617 |       0.00000 |       0.00300 

********** Iteration 1177 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00232 |       0.00000 |       0.00802 |       0.00339 |       1.10021
     -0.00700 |       0.00000 |       0.00649 |       0.00364 |       1.09981
     -0.01252 |       0.00000 |       0.00589 |       0.00457 |       1.10004
     -0.01396 |       0.00000 |       0.00560 |       0.00542 |       1.10390
     -0.01584 |       0.00000 |       0.00541 |       0.00603 |       1.10346
     -0.01762 |       0.00000 |       0.00524 |       0.00619 |       1.10700
     -0.01826 |       0.00000 |       0.00511 |       0.00701 |       1.10732
     -0.01903 |       0.00000 |       0.00494 |       0.00746 |       1.10618
     -0.02046 |       0.00000 |       0.00481 |       0.00810 |       1.10744
     -0.02097 |       0.00000 |       0.00469 |       0.00840 |       1.10638
Evaluating losses...
     -0.02299 |       0.00000 |       0.00469 |       0.00867 |     

     -0.01419 |       0.00000 |       0.00105 |       0.00613 |       1.06639
     -0.01465 |       0.00000 |       0.00100 |       0.00664 |       1.06506
Evaluating losses...
     -0.01606 |       0.00000 |       0.00094 |       0.00704 |       1.06302
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.34        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2295         |
| TimeElapsed     | 6.31e+03     |
| TimestepsSoFar  | 4845568      |
| ev_tdlam_before | 0.432        |
| loss_ent        | 1.0630188    |
| loss_kl         | 0.0070353765 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016064087 |
| loss_vf_loss    | 0.0009394735 |
----------------------------------
********** Iteration 1183 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00049 |       0.00000 |       0.00205 |       0.00324 |       1.09604
     -0.00385 |       0.00000 |       0.00102 

********** Iteration 1188 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00051 |       0.00000 |       0.00103 |       0.00276 |       1.10857
     -0.00502 |       0.00000 |       0.00087 |       0.00483 |       1.10920
     -0.00609 |       0.00000 |       0.00087 |       0.00542 |       1.11136
     -0.00748 |       0.00000 |       0.00086 |       0.00537 |       1.11291
     -0.00673 |       0.00000 |       0.00086 |       0.01915 |       1.10823
     -0.00674 |       0.00000 |       0.00084 |       0.02297 |       1.10081
     -0.00878 |       0.00000 |       0.00085 |       0.01007 |       1.10809
     -0.00960 |       0.00000 |       0.00083 |       0.00760 |       1.10988
     -0.00986 |       0.00000 |       0.00083 |       0.00731 |       1.11216
     -0.00992 |       0.00000 |       0.00084 |       0.00664 |       1.11297
Evaluating losses...
     -0.01145 |       0.00000 |       0.00081 |       0.00710 |     

     -0.01786 |       0.00000 |       0.00016 |       0.00796 |       1.08148
     -0.01897 |       0.00000 |       0.00015 |       0.00806 |       1.08157
     -0.02011 |       0.00000 |       0.00014 |       0.00920 |       1.08104
Evaluating losses...
     -0.02155 |       0.00000 |       0.00014 |       0.00914 |       1.08006
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.33         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 2310          |
| TimeElapsed     | 6.35e+03      |
| TimestepsSoFar  | 4890624       |
| ev_tdlam_before | -1.73         |
| loss_ent        | 1.0800557     |
| loss_kl         | 0.009137416   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.021545578  |
| loss_vf_loss    | 0.00014214627 |
-----------------------------------
********** Iteration 1194 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00210 |       0.00000 | 

********** Iteration 1199 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00072 |       0.00000 |       0.00399 |       0.00238 |       1.08399
     -0.00508 |       0.00000 |       0.00290 |       0.00326 |       1.08128
     -0.00814 |       0.00000 |       0.00262 |       0.00375 |       1.08314
     -0.01037 |       0.00000 |       0.00245 |       0.00360 |       1.08459
     -0.01056 |       0.00000 |       0.00231 |       0.00509 |       1.08576
     -0.01180 |       0.00000 |       0.00219 |       0.00581 |       1.08473
     -0.01299 |       0.00000 |       0.00213 |       0.00645 |       1.08939
     -0.01381 |       0.00000 |       0.00211 |       0.00612 |       1.08862
     -0.01513 |       0.00000 |       0.00202 |       0.00687 |       1.08877
     -0.01452 |       0.00000 |       0.00194 |       0.00705 |       1.08805
Evaluating losses...
     -0.01631 |       0.00000 |       0.00197 |       0.00726 |     

     -0.01954 |       0.00000 |      6.71e-05 |       0.00849 |       1.03110
     -0.02061 |       0.00000 |      6.66e-05 |       0.00866 |       1.03171
Evaluating losses...
     -0.02243 |       0.00000 |      6.41e-05 |       0.00896 |       1.02938
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.27        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2326         |
| TimeElapsed     | 6.38e+03     |
| TimestepsSoFar  | 4935680      |
| ev_tdlam_before | -1.82        |
| loss_ent        | 1.0293791    |
| loss_kl         | 0.0089619765 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022428475 |
| loss_vf_loss    | 6.41148e-05  |
----------------------------------
********** Iteration 1205 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00069 |       0.00000 |       0.00449 |       0.00379 |       1.04194
     -0.00529 |       0.00000 |       0.00366 

********** Iteration 1210 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00098 |       0.00000 |       0.00018 |       0.00282 |       1.03355
     -0.00569 |       0.00000 |       0.00012 |       0.00581 |       1.03388
     -0.00927 |       0.00000 |      9.90e-05 |       0.00601 |       1.02858
     -0.01209 |       0.00000 |      8.54e-05 |       0.00584 |       1.03184
     -0.01331 |       0.00000 |      7.48e-05 |       0.00660 |       1.03533
     -0.01476 |       0.00000 |      6.74e-05 |       0.00745 |       1.03197
     -0.01620 |       0.00000 |      6.00e-05 |       0.00767 |       1.03245
     -0.01670 |       0.00000 |      5.69e-05 |       0.00874 |       1.03338
     -0.01796 |       0.00000 |      5.20e-05 |       0.00901 |       1.03489
     -0.01835 |       0.00000 |      5.03e-05 |       0.00979 |       1.03301
Evaluating losses...
     -0.02070 |       0.00000 |      4.66e-05 |       0.00975 |     

     -0.01323 |       0.00000 |       0.00157 |       0.00795 |       1.03078
     -0.01501 |       0.00000 |       0.00151 |       0.00765 |       1.03132
     -0.01435 |       0.00000 |       0.00152 |       0.00850 |       1.03337
Evaluating losses...
     -0.01617 |       0.00000 |       0.00143 |       0.00887 |       1.03104
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.24        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2341         |
| TimeElapsed     | 6.42e+03     |
| TimestepsSoFar  | 4980736      |
| ev_tdlam_before | 0.585        |
| loss_ent        | 1.0310415    |
| loss_kl         | 0.00886735   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016165512 |
| loss_vf_loss    | 0.0014326837 |
----------------------------------
********** Iteration 1216 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00058 |       0.00000 |       0.00420 

********** Iteration 1221 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00089 |       0.00000 |       0.00301 |       0.00322 |       1.06954
     -0.00825 |       0.00000 |       0.00233 |       0.00452 |       1.07300
     -0.01119 |       0.00000 |       0.00218 |       0.00535 |       1.07459
     -0.01174 |       0.00000 |       0.00207 |       0.00537 |       1.07109
     -0.01367 |       0.00000 |       0.00193 |       0.00502 |       1.07523
     -0.01475 |       0.00000 |       0.00186 |       0.00529 |       1.07339
     -0.01551 |       0.00000 |       0.00175 |       0.00532 |       1.07425
     -0.01621 |       0.00000 |       0.00159 |       0.00564 |       1.07633
     -0.01694 |       0.00000 |       0.00152 |       0.00576 |       1.07602
     -0.01697 |       0.00000 |       0.00148 |       0.00677 |       1.07380
Evaluating losses...
     -0.01575 |       0.00000 |       0.00136 |       0.01216 |     

     -0.01601 |       0.00000 |       0.00272 |       0.00864 |       1.04317
     -0.01689 |       0.00000 |       0.00265 |       0.00906 |       1.04391
Evaluating losses...
     -0.01750 |       0.00000 |       0.00255 |       0.00824 |       1.04571
---------------------------------
| EpLenMean       | 3.02e+03    |
| EpRewMean       | -0.16       |
| EpThisIter      | 1           |
| EpisodesSoFar   | 2355        |
| TimeElapsed     | 6.64e+03    |
| TimestepsSoFar  | 5025792     |
| ev_tdlam_before | -0.0101     |
| loss_ent        | 1.0457113   |
| loss_kl         | 0.008242219 |
| loss_pol_entpen | 0.0         |
| loss_pol_surr   | -0.01750497 |
| loss_vf_loss    | 0.002547722 |
---------------------------------
********** Iteration 1227 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00168 |       0.00000 |       0.00385 |       0.00461 |       1.02725
     -0.00984 |       0.00000 |       0.00314 |       0.0056

********** Iteration 1232 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00382 |       0.00000 |       0.00326 |       0.00436 |       1.03702
     -0.00672 |       0.00000 |       0.00293 |       0.00701 |       1.03347
     -0.00960 |       0.00000 |       0.00275 |       0.00697 |       1.04088
     -0.01196 |       0.00000 |       0.00268 |       0.00631 |       1.04280
     -0.01333 |       0.00000 |       0.00260 |       0.00682 |       1.04192
     -0.01348 |       0.00000 |       0.00252 |       0.00820 |       1.04271
     -0.01466 |       0.00000 |       0.00248 |       0.00831 |       1.04083
     -0.01554 |       0.00000 |       0.00243 |       0.00784 |       1.04430
     -0.01600 |       0.00000 |       0.00240 |       0.00859 |       1.04257
     -0.01618 |       0.00000 |       0.00243 |       0.00904 |       1.04488
Evaluating losses...
     -0.01762 |       0.00000 |       0.00239 |       0.00930 |     

     -0.01722 |       0.00000 |       0.00385 |       0.00682 |       1.06104
     -0.01748 |       0.00000 |       0.00383 |       0.00759 |       1.06468
Evaluating losses...
     -0.01876 |       0.00000 |       0.00365 |       0.00783 |       1.06451
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.26        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2370         |
| TimeElapsed     | 6.67e+03     |
| TimestepsSoFar  | 5070848      |
| ev_tdlam_before | 0.32         |
| loss_ent        | 1.0645102    |
| loss_kl         | 0.007826527  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018763456 |
| loss_vf_loss    | 0.0036455358 |
----------------------------------
********** Iteration 1238 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00017 |       0.00000 |       0.00137 |       0.00138 |       1.05765
     -0.00358 |       0.00000 |       0.00069 

********** Iteration 1243 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00383 |       0.00000 |       0.00223 |       0.00413 |       1.09355
     -0.00393 |       0.00000 |       0.00195 |       0.00434 |       1.09024
     -0.00661 |       0.00000 |       0.00169 |       0.00380 |       1.08976
     -0.00740 |       0.00000 |       0.00157 |       0.00574 |       1.09261
     -0.00923 |       0.00000 |       0.00145 |       0.00557 |       1.08767
     -0.01141 |       0.00000 |       0.00134 |       0.00630 |       1.08721
     -0.01064 |       0.00000 |       0.00128 |       0.00680 |       1.08925
     -0.01178 |       0.00000 |       0.00126 |       0.00723 |       1.09268
     -0.01165 |       0.00000 |       0.00121 |       0.00782 |       1.09334
     -0.01420 |       0.00000 |       0.00116 |       0.00767 |       1.09315
Evaluating losses...
     -0.01436 |       0.00000 |       0.00110 |       0.00809 |     

     -0.01768 |       0.00000 |       0.00354 |       0.01099 |       1.04711
     -0.01902 |       0.00000 |       0.00339 |       0.01386 |       1.04812
Evaluating losses...
     -0.02091 |       0.00000 |       0.00320 |       0.01203 |       1.04797
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.29        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2385         |
| TimeElapsed     | 6.71e+03     |
| TimestepsSoFar  | 5115904      |
| ev_tdlam_before | 0.576        |
| loss_ent        | 1.0479726    |
| loss_kl         | 0.012027216  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.020914912 |
| loss_vf_loss    | 0.0032044668 |
----------------------------------
********** Iteration 1249 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00333 |       0.00000 |       0.00217 |       0.00633 |       1.07367
     -0.00685 |       0.00000 |       0.00172 

********** Iteration 1254 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00142 |       0.00000 |       0.00278 |       0.00343 |       1.01816
     -0.00808 |       0.00000 |       0.00215 |       0.00370 |       1.02161
     -0.00937 |       0.00000 |       0.00186 |       0.00524 |       1.02054
     -0.01073 |       0.00000 |       0.00176 |       0.00520 |       1.02100
     -0.01225 |       0.00000 |       0.00168 |       0.00595 |       1.01953
     -0.01278 |       0.00000 |       0.00164 |       0.00671 |       1.01979
     -0.01373 |       0.00000 |       0.00157 |       0.00806 |       1.01795
     -0.01458 |       0.00000 |       0.00155 |       0.00908 |       1.01663
     -0.01529 |       0.00000 |       0.00150 |       0.00896 |       1.01770
     -0.01556 |       0.00000 |       0.00148 |       0.00979 |       1.01813
Evaluating losses...
     -0.01686 |       0.00000 |       0.00143 |       0.01035 |     

     -0.01399 |       0.00000 |       0.00251 |       0.00940 |       1.04697
     -0.01469 |       0.00000 |       0.00249 |       0.00854 |       1.04500
Evaluating losses...
     -0.01664 |       0.00000 |       0.00249 |       0.00844 |       1.04557
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.33        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2400         |
| TimeElapsed     | 6.76e+03     |
| TimestepsSoFar  | 5160960      |
| ev_tdlam_before | 0.569        |
| loss_ent        | 1.0455682    |
| loss_kl         | 0.008439433  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01664027  |
| loss_vf_loss    | 0.0024937093 |
----------------------------------
********** Iteration 1260 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00892 |       0.00000 |       0.00242 |       0.00420 |       1.03197
     -0.00323 |       0.00000 |       0.00221 

********** Iteration 1265 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00078 |       0.00000 |       0.00012 |       0.00188 |       1.00115
     -0.00579 |       0.00000 |      6.89e-05 |       0.00516 |       0.99545
     -0.00759 |       0.00000 |      5.49e-05 |       0.00470 |       0.99365
     -0.00881 |       0.00000 |      4.74e-05 |       0.00588 |       0.99325
     -0.00995 |       0.00000 |      4.23e-05 |       0.00542 |       0.99319
     -0.01051 |       0.00000 |      3.86e-05 |       0.00644 |       0.99224
     -0.01105 |       0.00000 |      3.55e-05 |       0.00700 |       0.99219
     -0.01143 |       0.00000 |      3.44e-05 |       0.00703 |       0.99009
     -0.01239 |       0.00000 |      3.21e-05 |       0.00816 |       0.99037
     -0.01281 |       0.00000 |      2.94e-05 |       0.00842 |       0.98828
Evaluating losses...
     -0.01387 |       0.00000 |      2.72e-05 |       0.00831 |     

     -0.01378 |       0.00000 |       0.00180 |       0.00832 |       1.02446
     -0.01392 |       0.00000 |       0.00175 |       0.00722 |       1.02416
Evaluating losses...
     -0.01585 |       0.00000 |       0.00171 |       0.00730 |       1.02347
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.39        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2415         |
| TimeElapsed     | 6.81e+03     |
| TimestepsSoFar  | 5206016      |
| ev_tdlam_before | 0.738        |
| loss_ent        | 1.0234704    |
| loss_kl         | 0.0073015145 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015849615 |
| loss_vf_loss    | 0.0017079923 |
----------------------------------
********** Iteration 1271 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00052 |       0.00000 |       0.00179 |       0.00151 |       1.01209
     -0.00534 |       0.00000 |       0.00166 

********** Iteration 1276 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00589 |       0.00000 |       0.00646 |       0.00711 |       1.00666
     -0.00654 |       0.00000 |       0.00592 |       0.00788 |       1.00781
     -0.00930 |       0.00000 |       0.00576 |       0.00665 |       1.00466
     -0.01220 |       0.00000 |       0.00558 |       0.00770 |       1.00796
     -0.01260 |       0.00000 |       0.00543 |       0.00903 |       1.00605
     -0.01435 |       0.00000 |       0.00540 |       0.00916 |       1.00835
     -0.01599 |       0.00000 |       0.00534 |       0.00918 |       1.01110
     -0.01681 |       0.00000 |       0.00537 |       0.00936 |       1.00980
     -0.01647 |       0.00000 |       0.00519 |       0.00895 |       1.00680
     -0.01735 |       0.00000 |       0.00514 |       0.01036 |       1.01107
Evaluating losses...
     -0.01885 |       0.00000 |       0.00493 |       0.01007 |     

     -0.01248 |       0.00000 |       0.00171 |       0.00541 |       1.02395
     -0.01309 |       0.00000 |       0.00174 |       0.00609 |       1.02489
     -0.01419 |       0.00000 |       0.00167 |       0.00642 |       1.02301
     -0.01462 |       0.00000 |       0.00171 |       0.00735 |       1.02173
     -0.01550 |       0.00000 |       0.00159 |       0.00712 |       1.02337
Evaluating losses...
     -0.01697 |       0.00000 |       0.00150 |       0.00750 |       1.02215
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.5         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2430         |
| TimeElapsed     | 7.07e+03     |
| TimestepsSoFar  | 5251072      |
| ev_tdlam_before | 0.384        |
| loss_ent        | 1.0221493    |
| loss_kl         | 0.007504449  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01697081  |
| loss_vf_loss    | 0.0015018701 |
----------------------------------
********** Iteration 

********** Iteration 1287 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00053 |       0.00000 |       0.00655 |       0.00199 |       1.01367
     -0.00618 |       0.00000 |       0.00524 |       0.00348 |       1.01063
     -0.00840 |       0.00000 |       0.00488 |       0.00472 |       1.00937
     -0.01041 |       0.00000 |       0.00462 |       0.00465 |       1.00995
     -0.01139 |       0.00000 |       0.00436 |       0.00554 |       1.00847
     -0.01250 |       0.00000 |       0.00429 |       0.00476 |       1.01125
     -0.01306 |       0.00000 |       0.00414 |       0.00598 |       1.01319
     -0.01351 |       0.00000 |       0.00395 |       0.00635 |       1.01181
     -0.01472 |       0.00000 |       0.00384 |       0.00626 |       1.01379
     -0.01500 |       0.00000 |       0.00381 |       0.00650 |       1.01127
Evaluating losses...
     -0.01594 |       0.00000 |       0.00359 |       0.00741 |     

     -0.01509 |       0.00000 |       0.00269 |       0.00860 |       1.03621
     -0.01612 |       0.00000 |       0.00265 |       0.00909 |       1.03674
Evaluating losses...
     -0.01668 |       0.00000 |       0.00250 |       0.00886 |       1.03619
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.53        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2445         |
| TimeElapsed     | 7.11e+03     |
| TimestepsSoFar  | 5296128      |
| ev_tdlam_before | 0.39         |
| loss_ent        | 1.0361911    |
| loss_kl         | 0.008859506  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016681388 |
| loss_vf_loss    | 0.002495192  |
----------------------------------
********** Iteration 1293 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00034 |       0.00000 |       0.00067 |       0.00237 |       1.03947
     -0.00633 |       0.00000 |       0.00030 

********** Iteration 1298 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00204 |       0.00000 |       0.00378 |       0.00988 |       1.06241
     -0.00634 |       0.00000 |       0.00299 |       0.00875 |       1.06243
     -0.00739 |       0.00000 |       0.00253 |       0.00932 |       1.06131
     -0.00921 |       0.00000 |       0.00238 |       0.01165 |       1.06110
     -0.00969 |       0.00000 |       0.00231 |       0.01579 |       1.05935
     -0.01087 |       0.00000 |       0.00221 |       0.01762 |       1.05934
     -0.01182 |       0.00000 |       0.00210 |       0.02780 |       1.05686
     -0.01218 |       0.00000 |       0.00206 |       0.02760 |       1.05915
     -0.01278 |       0.00000 |       0.00197 |       0.02616 |       1.06119
     -0.01261 |       0.00000 |       0.00187 |       0.02114 |       1.05980
Evaluating losses...
     -0.01394 |       0.00000 |       0.00179 |       0.02358 |     

     -0.01771 |       0.00000 |      1.33e-05 |       0.00647 |       1.04032
     -0.01907 |       0.00000 |      1.29e-05 |       0.00680 |       1.04203
     -0.01921 |       0.00000 |      1.26e-05 |       0.00741 |       1.04048
Evaluating losses...
     -0.02125 |       0.00000 |      1.18e-05 |       0.00810 |       1.04012
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.55         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 2460          |
| TimeElapsed     | 7.15e+03      |
| TimestepsSoFar  | 5341184       |
| ev_tdlam_before | -3.89         |
| loss_ent        | 1.0401205     |
| loss_kl         | 0.00810145    |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.021254158  |
| loss_vf_loss    | 1.1807994e-05 |
-----------------------------------
********** Iteration 1304 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00135 |       0.00000 | 

********** Iteration 1309 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00055 |       0.00000 |       0.00133 |       0.00158 |       1.00244
     -0.00844 |       0.00000 |       0.00074 |       0.00321 |       0.99880
     -0.01106 |       0.00000 |       0.00066 |       0.00420 |       0.99846
     -0.01370 |       0.00000 |       0.00062 |       0.00456 |       0.99825
     -0.01456 |       0.00000 |       0.00058 |       0.00536 |       0.99515
     -0.01542 |       0.00000 |       0.00056 |       0.00571 |       0.99705
     -0.01672 |       0.00000 |       0.00054 |       0.00553 |       0.99825
     -0.01748 |       0.00000 |       0.00052 |       0.00629 |       0.99801
     -0.01745 |       0.00000 |       0.00050 |       0.00704 |       0.99702
     -0.01840 |       0.00000 |       0.00049 |       0.00789 |       0.99464
Evaluating losses...
     -0.02027 |       0.00000 |       0.00049 |       0.00773 |     

     -0.01702 |       0.00000 |       0.00178 |       0.00744 |       0.97403
     -0.01747 |       0.00000 |       0.00171 |       0.00779 |       0.97330
     -0.01870 |       0.00000 |       0.00170 |       0.00821 |       0.97332
Evaluating losses...
     -0.01935 |       0.00000 |       0.00163 |       0.00826 |       0.97076
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.49        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2475         |
| TimeElapsed     | 7.18e+03     |
| TimestepsSoFar  | 5386240      |
| ev_tdlam_before | 0.472        |
| loss_ent        | 0.9707621    |
| loss_kl         | 0.00825632   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019354692 |
| loss_vf_loss    | 0.0016327933 |
----------------------------------
********** Iteration 1315 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00016 |       0.00000 |       0.00160 

********** Iteration 1320 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     6.07e-05 |       0.00000 |       0.00749 |       0.00186 |       0.98532
     -0.00713 |       0.00000 |       0.00570 |       0.00388 |       0.98338
     -0.00900 |       0.00000 |       0.00502 |       0.00514 |       0.98200
     -0.01111 |       0.00000 |       0.00471 |       0.00484 |       0.98256
     -0.01246 |       0.00000 |       0.00459 |       0.00484 |       0.98092
     -0.01387 |       0.00000 |       0.00435 |       0.00549 |       0.97907
     -0.01464 |       0.00000 |       0.00422 |       0.00586 |       0.98004
     -0.01556 |       0.00000 |       0.00403 |       0.00587 |       0.97905
     -0.01526 |       0.00000 |       0.00390 |       0.00679 |       0.97967
     -0.01594 |       0.00000 |       0.00385 |       0.00651 |       0.97978
Evaluating losses...
     -0.01741 |       0.00000 |       0.00363 |       0.00610 |     

     -0.01700 |       0.00000 |       0.00314 |       0.00842 |       0.98786
     -0.01751 |       0.00000 |       0.00319 |       0.00818 |       0.98723
Evaluating losses...
     -0.01921 |       0.00000 |       0.00295 |       0.00796 |       0.98761
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.51        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2490         |
| TimeElapsed     | 7.22e+03     |
| TimestepsSoFar  | 5431296      |
| ev_tdlam_before | 0.541        |
| loss_ent        | 0.98760575   |
| loss_kl         | 0.007959123  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019213554 |
| loss_vf_loss    | 0.002948555  |
----------------------------------
********** Iteration 1326 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00377 |       0.00000 |       0.00728 |       0.00259 |       0.96961
     -0.00548 |       0.00000 |       0.00622 

********** Iteration 1331 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00131 |       0.00000 |       0.00580 |       0.00741 |       1.00144
     -0.00615 |       0.00000 |       0.00453 |       0.00693 |       0.99823
     -0.00850 |       0.00000 |       0.00428 |       0.00561 |       1.00260
     -0.00987 |       0.00000 |       0.00408 |       0.00490 |       1.00251
     -0.01130 |       0.00000 |       0.00398 |       0.00564 |       1.00486
     -0.01142 |       0.00000 |       0.00385 |       0.00573 |       1.00792
     -0.01261 |       0.00000 |       0.00384 |       0.00645 |       1.00876
     -0.01324 |       0.00000 |       0.00372 |       0.00629 |       1.00990
     -0.01383 |       0.00000 |       0.00374 |       0.00796 |       1.01168
     -0.01467 |       0.00000 |       0.00366 |       0.00738 |       1.01187
Evaluating losses...
     -0.01562 |       0.00000 |       0.00353 |       0.00800 |     

     -0.01302 |       0.00000 |       0.00115 |       0.01382 |       0.99851
     -0.01452 |       0.00000 |       0.00111 |       0.01132 |       0.99953
Evaluating losses...
     -0.01447 |       0.00000 |       0.00107 |       0.01123 |       1.00101
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.5         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2505         |
| TimeElapsed     | 7.26e+03     |
| TimestepsSoFar  | 5476352      |
| ev_tdlam_before | 0.603        |
| loss_ent        | 1.0010147    |
| loss_kl         | 0.011234173  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014469652 |
| loss_vf_loss    | 0.0010672456 |
----------------------------------
********** Iteration 1337 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00225 |       0.00000 |       0.00726 |       0.00384 |       0.98032
     -0.00719 |       0.00000 |       0.00655 

********** Iteration 1342 ************
Eval num_timesteps=5496832, episode_reward=-0.21 +/- 0.96
Episode length: 3000.00 +/- 0.00
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00436 |       0.00000 |       0.00442 |       0.00429 |       1.01509
     -0.00528 |       0.00000 |       0.00359 |       0.00535 |       1.01561
     -0.00839 |       0.00000 |       0.00331 |       0.00466 |       1.01344
     -0.01039 |       0.00000 |       0.00313 |       0.00511 |       1.01587
     -0.01168 |       0.00000 |       0.00293 |       0.00529 |       1.01582
     -0.01282 |       0.00000 |       0.00291 |       0.00503 |       1.01612
     -0.01376 |       0.00000 |       0.00279 |       0.00568 |       1.01789
     -0.01464 |       0.00000 |       0.00272 |       0.00598 |       1.02045
     -0.01461 |       0.00000 |       0.00267 |       0.00658 |       1.01883
     -0.01550 |       0.00000 |       0.00261 |       0.00740 |       1.0175

     -0.01157 |       0.00000 |       0.00122 |       0.00525 |       1.05674
     -0.01211 |       0.00000 |       0.00116 |       0.00585 |       1.05752
     -0.01273 |       0.00000 |       0.00108 |       0.00583 |       1.05637
     -0.01344 |       0.00000 |       0.00105 |       0.00600 |       1.05682
Evaluating losses...
     -0.01388 |       0.00000 |       0.00097 |       0.00615 |       1.05474
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.51         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2520          |
| TimeElapsed     | 7.44e+03      |
| TimestepsSoFar  | 5521408       |
| ev_tdlam_before | 0.601         |
| loss_ent        | 1.0547422     |
| loss_kl         | 0.0061461106  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.01387634   |
| loss_vf_loss    | 0.00096645555 |
-----------------------------------
********** Iteration 1348 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 1353 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -3.21e-05 |       0.00000 |       0.00297 |       0.00596 |       1.02655
     -0.00479 |       0.00000 |       0.00197 |       0.00508 |       1.02608
     -0.00634 |       0.00000 |       0.00174 |       0.00729 |       1.02514
     -0.00735 |       0.00000 |       0.00160 |       0.00758 |       1.02587
     -0.00827 |       0.00000 |       0.00145 |       0.00620 |       1.02615
     -0.00838 |       0.00000 |       0.00147 |       0.00720 |       1.02580
     -0.00924 |       0.00000 |       0.00140 |       0.00816 |       1.02581
     -0.00960 |       0.00000 |       0.00133 |       0.00732 |       1.02573
     -0.01035 |       0.00000 |       0.00124 |       0.00769 |       1.02711
     -0.01031 |       0.00000 |       0.00124 |       0.00806 |       1.02755
Evaluating losses...
     -0.01175 |       0.00000 |       0.00115 |       0.00805 |     

     -0.01356 |       0.00000 |       0.00202 |       0.01038 |       1.02451
     -0.01434 |       0.00000 |       0.00195 |       0.01008 |       1.02430
     -0.01433 |       0.00000 |       0.00192 |       0.01025 |       1.02585
Evaluating losses...
     -0.01551 |       0.00000 |       0.00188 |       0.01001 |       1.02357
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.51        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2535         |
| TimeElapsed     | 7.47e+03     |
| TimestepsSoFar  | 5566464      |
| ev_tdlam_before | 0.491        |
| loss_ent        | 1.0235661    |
| loss_kl         | 0.0100090485 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015510839 |
| loss_vf_loss    | 0.001884138  |
----------------------------------
********** Iteration 1359 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00127 |       0.00000 |       0.00339 

********** Iteration 1364 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00014 |       0.00000 |       0.00045 |       0.00204 |       1.00659
     -0.00669 |       0.00000 |       0.00026 |       0.00342 |       1.01253
     -0.00970 |       0.00000 |       0.00022 |       0.00389 |       1.01120
     -0.01210 |       0.00000 |       0.00020 |       0.00396 |       1.01277
     -0.01374 |       0.00000 |       0.00019 |       0.00466 |       1.01244
     -0.01461 |       0.00000 |       0.00018 |       0.00543 |       1.01280
     -0.01559 |       0.00000 |       0.00018 |       0.00578 |       1.01307
     -0.01622 |       0.00000 |       0.00017 |       0.00597 |       1.01379
     -0.01691 |       0.00000 |       0.00016 |       0.00619 |       1.01413
     -0.01715 |       0.00000 |       0.00016 |       0.00689 |       1.01294
Evaluating losses...
     -0.01851 |       0.00000 |       0.00015 |       0.00698 |     

     -0.01385 |       0.00000 |       0.00139 |       0.00667 |       0.99975
     -0.01342 |       0.00000 |       0.00135 |       0.00657 |       0.99866
     -0.01426 |       0.00000 |       0.00127 |       0.00649 |       0.99931
Evaluating losses...
     -0.01508 |       0.00000 |       0.00119 |       0.00645 |       0.99882
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.47         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2550          |
| TimeElapsed     | 7.51e+03      |
| TimestepsSoFar  | 5611520       |
| ev_tdlam_before | 0.475         |
| loss_ent        | 0.9988189     |
| loss_kl         | 0.00645343    |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0150821805 |
| loss_vf_loss    | 0.0011864986  |
-----------------------------------
********** Iteration 1370 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00043 |       0.00000 | 

********** Iteration 1375 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.50e-05 |       0.00000 |       0.00049 |       0.00248 |       0.99588
     -0.00660 |       0.00000 |       0.00019 |       0.00397 |       0.99547
     -0.00890 |       0.00000 |       0.00016 |       0.00371 |       0.99330
     -0.01002 |       0.00000 |       0.00013 |       0.00445 |       0.99232
     -0.01103 |       0.00000 |       0.00013 |       0.00538 |       0.99529
     -0.01229 |       0.00000 |       0.00012 |       0.00484 |       0.99571
     -0.01309 |       0.00000 |       0.00011 |       0.00567 |       0.99391
     -0.01389 |       0.00000 |       0.00011 |       0.00524 |       0.99465
     -0.01422 |       0.00000 |       0.00010 |       0.00594 |       0.99571
     -0.01436 |       0.00000 |       0.00010 |       0.00728 |       0.99638
Evaluating losses...
     -0.01560 |       0.00000 |      9.12e-05 |       0.00704 |     

     -0.01407 |       0.00000 |       0.00040 |       0.00671 |       0.94791
     -0.01478 |       0.00000 |       0.00039 |       0.00708 |       0.94787
Evaluating losses...
     -0.01617 |       0.00000 |       0.00036 |       0.00690 |       0.94631
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.47         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2565          |
| TimeElapsed     | 7.55e+03      |
| TimestepsSoFar  | 5656576       |
| ev_tdlam_before | 0.629         |
| loss_ent        | 0.94630903    |
| loss_kl         | 0.0068955524  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.016168337  |
| loss_vf_loss    | 0.00036427335 |
-----------------------------------
********** Iteration 1381 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00524 |       0.00000 |       0.00211 |       0.00361 |       0.96953
      0.00098 |       0.00000 | 

********** Iteration 1386 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00070 |       0.00000 |       0.00203 |       0.00467 |       0.98990
     -0.00562 |       0.00000 |       0.00170 |       0.00350 |       0.99110
     -0.00700 |       0.00000 |       0.00159 |       0.00327 |       0.99122
     -0.00862 |       0.00000 |       0.00150 |       0.00360 |       0.99325
     -0.00866 |       0.00000 |       0.00148 |       0.00449 |       0.99630
     -0.01017 |       0.00000 |       0.00139 |       0.00405 |       0.99440
     -0.01093 |       0.00000 |       0.00136 |       0.00504 |       0.99621
     -0.01137 |       0.00000 |       0.00134 |       0.00481 |       0.99654
     -0.01162 |       0.00000 |       0.00132 |       0.00503 |       0.99647
     -0.01169 |       0.00000 |       0.00131 |       0.00543 |       0.99807
Evaluating losses...
     -0.01199 |       0.00000 |       0.00128 |       0.00563 |     

     -0.02019 |       0.00000 |      2.54e-05 |       0.00774 |       0.99430
     -0.02125 |       0.00000 |      2.42e-05 |       0.00845 |       0.99430
     -0.02241 |       0.00000 |      2.35e-05 |       0.00853 |       0.99394
Evaluating losses...
     -0.02428 |       0.00000 |      2.44e-05 |       0.00929 |       0.99122
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.44         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2580          |
| TimeElapsed     | 7.58e+03      |
| TimestepsSoFar  | 5701632       |
| ev_tdlam_before | -0.544        |
| loss_ent        | 0.99121684    |
| loss_kl         | 0.009288993   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.024284326  |
| loss_vf_loss    | 2.4421224e-05 |
-----------------------------------
********** Iteration 1392 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00167 |       0.00000 | 

********** Iteration 1397 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00102 |       0.00000 |       0.00044 |       0.00302 |       0.99431
     -0.00540 |       0.00000 |       0.00022 |       0.00310 |       0.99397
     -0.00763 |       0.00000 |       0.00016 |       0.00415 |       0.99106
     -0.00941 |       0.00000 |       0.00013 |       0.00426 |       0.99283
     -0.01116 |       0.00000 |       0.00011 |       0.00519 |       0.99315
     -0.01187 |       0.00000 |      9.73e-05 |       0.00576 |       0.99278
     -0.01251 |       0.00000 |      8.78e-05 |       0.00684 |       0.99413
     -0.01276 |       0.00000 |      8.16e-05 |       0.00810 |       0.99181
     -0.01375 |       0.00000 |      7.56e-05 |       0.00817 |       0.99469
     -0.01387 |       0.00000 |      7.04e-05 |       0.00897 |       0.99413
Evaluating losses...
     -0.01539 |       0.00000 |      6.61e-05 |       0.00816 |     

     -0.01119 |       0.00000 |       0.00111 |       0.00567 |       1.01534
     -0.01175 |       0.00000 |       0.00110 |       0.00621 |       1.01666
Evaluating losses...
     -0.01202 |       0.00000 |       0.00102 |       0.00644 |       1.01619
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.4         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2595         |
| TimeElapsed     | 7.62e+03     |
| TimestepsSoFar  | 5746688      |
| ev_tdlam_before | 0.259        |
| loss_ent        | 1.0161853    |
| loss_kl         | 0.006443893  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012016152 |
| loss_vf_loss    | 0.0010206116 |
----------------------------------
********** Iteration 1403 ************
Eval num_timesteps=5746688, episode_reward=-0.49 +/- 1.17
Episode length: 3000.00 +/- 0.00
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00031 |       0.00000 |  

********** Iteration 1408 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00094 |       0.00000 |       0.00074 |       0.00228 |       1.00854
     -0.00342 |       0.00000 |       0.00041 |       0.00197 |       1.00945
     -0.00420 |       0.00000 |       0.00037 |       0.00285 |       1.01001
     -0.00469 |       0.00000 |       0.00034 |       0.00402 |       1.01025
     -0.00640 |       0.00000 |       0.00031 |       0.00453 |       1.01131
     -0.00657 |       0.00000 |       0.00029 |       0.00549 |       1.01171
     -0.00784 |       0.00000 |       0.00026 |       0.00604 |       1.01549
     -0.00820 |       0.00000 |       0.00024 |       0.00639 |       1.01559
     -0.00796 |       0.00000 |       0.00023 |       0.00692 |       1.01599
     -0.00961 |       0.00000 |       0.00023 |       0.00715 |       1.01651
Evaluating losses...
     -0.01036 |       0.00000 |       0.00022 |       0.00686 |     

     -0.01354 |       0.00000 |       0.00277 |       0.00656 |       0.99117
     -0.01423 |       0.00000 |       0.00264 |       0.00649 |       0.99172
Evaluating losses...
     -0.01554 |       0.00000 |       0.00250 |       0.00637 |       0.99220
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.46        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2609         |
| TimeElapsed     | 7.79e+03     |
| TimestepsSoFar  | 5791744      |
| ev_tdlam_before | 0.308        |
| loss_ent        | 0.99220306   |
| loss_kl         | 0.006368975  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015544846 |
| loss_vf_loss    | 0.0024978563 |
----------------------------------
********** Iteration 1414 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00411 |       0.00000 |       0.00223 |       0.00130 |       1.00214
     -0.00478 |       0.00000 |       0.00149 

********** Iteration 1419 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00205 |       0.00000 |       0.00185 |       0.00350 |       1.01495
     -0.00488 |       0.00000 |       0.00169 |       0.00445 |       1.01148
     -0.00744 |       0.00000 |       0.00168 |       0.00596 |       1.01282
     -0.00856 |       0.00000 |       0.00161 |       0.01097 |       1.01368
     -0.01028 |       0.00000 |       0.00156 |       0.00807 |       1.01613
     -0.01183 |       0.00000 |       0.00157 |       0.00982 |       1.01719
     -0.01253 |       0.00000 |       0.00158 |       0.00834 |       1.01905
     -0.01293 |       0.00000 |       0.00154 |       0.01016 |       1.02006
     -0.01364 |       0.00000 |       0.00152 |       0.01004 |       1.02144
     -0.01382 |       0.00000 |       0.00150 |       0.01148 |       1.02047
Evaluating losses...
     -0.01429 |       0.00000 |       0.00151 |       0.01018 |     

     -0.01120 |       0.00000 |       0.00140 |       0.00712 |       1.02656
     -0.01148 |       0.00000 |       0.00133 |       0.00853 |       1.02772
Evaluating losses...
     -0.01279 |       0.00000 |       0.00129 |       0.00872 |       1.02836
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.33        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2624         |
| TimeElapsed     | 7.83e+03     |
| TimestepsSoFar  | 5836800      |
| ev_tdlam_before | 0.304        |
| loss_ent        | 1.028364     |
| loss_kl         | 0.008715749  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01278771  |
| loss_vf_loss    | 0.0012902077 |
----------------------------------
********** Iteration 1425 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00306 |       0.00000 |       0.00176 |       0.00163 |       0.99441
     -0.00161 |       0.00000 |       0.00130 

********** Iteration 1430 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00033 |       0.00000 |       0.00039 |       0.00281 |       1.05506
     -0.00553 |       0.00000 |       0.00023 |       0.00611 |       1.05579
     -0.00901 |       0.00000 |       0.00018 |       0.00484 |       1.05336
     -0.01098 |       0.00000 |       0.00015 |       0.00417 |       1.05205
     -0.01174 |       0.00000 |       0.00014 |       0.00511 |       1.05198
     -0.01343 |       0.00000 |       0.00013 |       0.00542 |       1.05374
     -0.01437 |       0.00000 |       0.00012 |       0.00557 |       1.05132
     -0.01473 |       0.00000 |       0.00012 |       0.00582 |       1.05103
     -0.01548 |       0.00000 |       0.00011 |       0.00614 |       1.04981
     -0.01592 |       0.00000 |       0.00011 |       0.00624 |       1.04969
Evaluating losses...
     -0.01707 |       0.00000 |       0.00010 |       0.00613 |     

     -0.01928 |       0.00000 |      1.96e-05 |       0.00580 |       1.02719
     -0.02010 |       0.00000 |      1.80e-05 |       0.00642 |       1.02840
     -0.02082 |       0.00000 |      1.71e-05 |       0.00672 |       1.02881
Evaluating losses...
     -0.02278 |       0.00000 |      1.99e-05 |       0.00676 |       1.03046
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.26        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2639         |
| TimeElapsed     | 7.86e+03     |
| TimestepsSoFar  | 5881856      |
| ev_tdlam_before | -0.585       |
| loss_ent        | 1.0304648    |
| loss_kl         | 0.0067561693 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.022780979 |
| loss_vf_loss    | 1.993887e-05 |
----------------------------------
********** Iteration 1436 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00133 |       0.00000 |       0.00519 

********** Iteration 1441 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00059 |       0.00000 |       0.00597 |       0.00165 |       1.03952
     -0.00756 |       0.00000 |       0.00400 |       0.00400 |       1.03982
     -0.01052 |       0.00000 |       0.00313 |       0.00541 |       1.03720
     -0.01185 |       0.00000 |       0.00279 |       0.00645 |       1.03949
     -0.01281 |       0.00000 |       0.00261 |       0.00733 |       1.03820
     -0.01383 |       0.00000 |       0.00248 |       0.00969 |       1.03729
     -0.01445 |       0.00000 |       0.00234 |       0.01032 |       1.03795
     -0.01525 |       0.00000 |       0.00226 |       0.01086 |       1.03693
     -0.01605 |       0.00000 |       0.00220 |       0.01153 |       1.03747
     -0.01657 |       0.00000 |       0.00215 |       0.01207 |       1.03857
Evaluating losses...
     -0.01730 |       0.00000 |       0.00203 |       0.01192 |     

     -0.01048 |       0.00000 |       0.00080 |       0.00662 |       1.09979
     -0.01070 |       0.00000 |       0.00080 |       0.00672 |       1.09874
     -0.01034 |       0.00000 |       0.00081 |       0.00652 |       1.10041
Evaluating losses...
     -0.01087 |       0.00000 |       0.00076 |       0.00733 |       1.10002
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.27        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2654         |
| TimeElapsed     | 7.89e+03     |
| TimestepsSoFar  | 5926912      |
| ev_tdlam_before | -0.0028      |
| loss_ent        | 1.1000152    |
| loss_kl         | 0.0073328605 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.0108688   |
| loss_vf_loss    | 0.0007565761 |
----------------------------------
********** Iteration 1447 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00111 |       0.00000 |       0.00264 

********** Iteration 1452 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00090 |       0.00000 |       0.00285 |       0.00363 |       0.98515
     -0.00634 |       0.00000 |       0.00216 |       0.00608 |       0.98444
     -0.00904 |       0.00000 |       0.00197 |       0.00718 |       0.98255
     -0.01097 |       0.00000 |       0.00188 |       0.00620 |       0.98270
     -0.01259 |       0.00000 |       0.00179 |       0.00703 |       0.98335
     -0.01278 |       0.00000 |       0.00176 |       0.00703 |       0.98312
     -0.01389 |       0.00000 |       0.00174 |       0.00737 |       0.98376
     -0.01407 |       0.00000 |       0.00170 |       0.00784 |       0.98512
     -0.01504 |       0.00000 |       0.00168 |       0.00792 |       0.98575
     -0.01550 |       0.00000 |       0.00166 |       0.00803 |       0.98642
Evaluating losses...
     -0.01641 |       0.00000 |       0.00161 |       0.00845 |     

     -0.01783 |       0.00000 |       0.00209 |       0.00772 |       0.99617
     -0.01762 |       0.00000 |       0.00208 |       0.00797 |       0.99604
Evaluating losses...
     -0.01943 |       0.00000 |       0.00201 |       0.00800 |       0.99374
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.36        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2669         |
| TimeElapsed     | 7.93e+03     |
| TimestepsSoFar  | 5971968      |
| ev_tdlam_before | 0.621        |
| loss_ent        | 0.9937375    |
| loss_kl         | 0.008000219  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.019428913 |
| loss_vf_loss    | 0.0020096395 |
----------------------------------
********** Iteration 1458 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00079 |       0.00000 |       0.00156 |       0.00149 |       0.94928
     -0.00472 |       0.00000 |       0.00119 

********** Iteration 1463 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00221 |       0.00000 |       0.00273 |       0.00571 |       0.96434
     -0.00479 |       0.00000 |       0.00203 |       0.00403 |       0.96841
     -0.00766 |       0.00000 |       0.00181 |       0.00420 |       0.97022
     -0.00847 |       0.00000 |       0.00162 |       0.00453 |       0.97177
     -0.00989 |       0.00000 |       0.00153 |       0.00477 |       0.97290
     -0.01060 |       0.00000 |       0.00145 |       0.00552 |       0.97243
     -0.01157 |       0.00000 |       0.00138 |       0.00601 |       0.97197
     -0.01230 |       0.00000 |       0.00134 |       0.00608 |       0.97322
     -0.01300 |       0.00000 |       0.00129 |       0.00640 |       0.97218
     -0.01355 |       0.00000 |       0.00124 |       0.00678 |       0.97290
Evaluating losses...
     -0.01454 |       0.00000 |       0.00115 |       0.00739 |     

     -0.01569 |       0.00000 |       0.00050 |       0.00548 |       1.03957
     -0.01644 |       0.00000 |       0.00049 |       0.00631 |       1.03931
     -0.01690 |       0.00000 |       0.00047 |       0.00673 |       1.04004
Evaluating losses...
     -0.01795 |       0.00000 |       0.00045 |       0.00718 |       1.04151
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.44         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 2684          |
| TimeElapsed     | 8.1e+03       |
| TimestepsSoFar  | 6017024       |
| ev_tdlam_before | -0.0594       |
| loss_ent        | 1.0415077     |
| loss_kl         | 0.0071753957  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.017951356  |
| loss_vf_loss    | 0.00045430512 |
-----------------------------------
********** Iteration 1469 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00213 |       0.00000 | 

********** Iteration 1474 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00253 |       0.00000 |       0.00594 |       0.01133 |       1.03373
     -0.00497 |       0.00000 |       0.00457 |       0.00613 |       1.03160
     -0.00889 |       0.00000 |       0.00424 |       0.00698 |       1.03070
     -0.01049 |       0.00000 |       0.00400 |       0.00955 |       1.03408
     -0.01136 |       0.00000 |       0.00401 |       0.00965 |       1.03126
     -0.01322 |       0.00000 |       0.00384 |       0.00986 |       1.03247
     -0.01420 |       0.00000 |       0.00386 |       0.00974 |       1.03279
     -0.01484 |       0.00000 |       0.00382 |       0.01029 |       1.03445
     -0.01593 |       0.00000 |       0.00373 |       0.01120 |       1.03048
     -0.01692 |       0.00000 |       0.00361 |       0.01129 |       1.03114
Evaluating losses...
     -0.01806 |       0.00000 |       0.00347 |       0.01016 |     

     -0.01288 |       0.00000 |       0.00096 |       0.00485 |       1.00266
     -0.01354 |       0.00000 |       0.00091 |       0.00495 |       1.00277
Evaluating losses...
     -0.01461 |       0.00000 |       0.00085 |       0.00507 |       1.00214
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.43        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2699         |
| TimeElapsed     | 8.13e+03     |
| TimestepsSoFar  | 6062080      |
| ev_tdlam_before | 0.14         |
| loss_ent        | 1.0021363    |
| loss_kl         | 0.005071141  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014611591 |
| loss_vf_loss    | 0.0008516395 |
----------------------------------
********** Iteration 1480 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00033 |       0.00000 |       0.00042 |       0.00153 |       1.00679
     -0.00636 |       0.00000 |       0.00017 

********** Iteration 1485 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00229 |       0.00000 |       0.00516 |       0.00457 |       0.97859
     -0.00421 |       0.00000 |       0.00379 |       0.00334 |       0.98114
     -0.00841 |       0.00000 |       0.00368 |       0.00474 |       0.97945
     -0.01051 |       0.00000 |       0.00355 |       0.00599 |       0.98172
     -0.01249 |       0.00000 |       0.00352 |       0.00665 |       0.98372
     -0.01291 |       0.00000 |       0.00341 |       0.00724 |       0.98280
     -0.01369 |       0.00000 |       0.00335 |       0.00779 |       0.98096
     -0.01447 |       0.00000 |       0.00329 |       0.00782 |       0.98055
     -0.01483 |       0.00000 |       0.00325 |       0.00769 |       0.98233
     -0.01522 |       0.00000 |       0.00324 |       0.00792 |       0.98254
Evaluating losses...
     -0.01604 |       0.00000 |       0.00332 |       0.00870 |     

     -0.01075 |       0.00000 |       0.00079 |       0.00608 |       1.04307
     -0.01085 |       0.00000 |       0.00075 |       0.00603 |       1.04290
Evaluating losses...
     -0.01208 |       0.00000 |       0.00072 |       0.00576 |       1.04094
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.45        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2714         |
| TimeElapsed     | 8.17e+03     |
| TimestepsSoFar  | 6107136      |
| ev_tdlam_before | -0.268       |
| loss_ent        | 1.0409353    |
| loss_kl         | 0.0057562646 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012078261 |
| loss_vf_loss    | 0.0007184694 |
----------------------------------
********** Iteration 1491 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00072 |       0.00000 |       0.00032 |       0.00251 |       1.01705
     -0.00509 |       0.00000 |       0.00018 

********** Iteration 1496 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00027 |       0.00000 |       0.00150 |       0.00416 |       0.99917
     -0.00619 |       0.00000 |       0.00139 |       0.00377 |       1.00398
     -0.00864 |       0.00000 |       0.00132 |       0.00366 |       1.00330
     -0.00920 |       0.00000 |       0.00128 |       0.00435 |       1.00250
     -0.01016 |       0.00000 |       0.00128 |       0.00522 |       1.00306
     -0.01001 |       0.00000 |       0.00125 |       0.00636 |       1.00202
     -0.01094 |       0.00000 |       0.00128 |       0.00706 |       1.00014
     -0.01161 |       0.00000 |       0.00123 |       0.00697 |       1.00148
     -0.01212 |       0.00000 |       0.00121 |       0.00687 |       1.00168
     -0.01284 |       0.00000 |       0.00120 |       0.00690 |       1.00183
Evaluating losses...
     -0.01342 |       0.00000 |       0.00116 |       0.00715 |     

     -0.01278 |       0.00000 |       0.00221 |       0.00504 |       0.98732
     -0.01342 |       0.00000 |       0.00208 |       0.00491 |       0.98813
Evaluating losses...
     -0.01395 |       0.00000 |       0.00201 |       0.00481 |       0.98708
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.5         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2729         |
| TimeElapsed     | 8.2e+03      |
| TimestepsSoFar  | 6152192      |
| ev_tdlam_before | 0.362        |
| loss_ent        | 0.9870769    |
| loss_kl         | 0.0048057023 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013952158 |
| loss_vf_loss    | 0.0020101066 |
----------------------------------
********** Iteration 1502 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00013 |       0.00000 |       0.00555 |       0.00096 |       1.00772
     -0.00571 |       0.00000 |       0.00372 

********** Iteration 1507 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00172 |       0.00000 |       0.00377 |       0.01248 |       0.98332
     -0.00554 |       0.00000 |       0.00327 |       0.01056 |       0.98333
     -0.00840 |       0.00000 |       0.00315 |       0.01035 |       0.97945
     -0.01033 |       0.00000 |       0.00310 |       0.00690 |       0.97585
     -0.01200 |       0.00000 |       0.00299 |       0.00810 |       0.97464
     -0.01300 |       0.00000 |       0.00293 |       0.00851 |       0.97374
     -0.01336 |       0.00000 |       0.00288 |       0.00945 |       0.97542
     -0.01424 |       0.00000 |       0.00283 |       0.00950 |       0.97558
     -0.01316 |       0.00000 |       0.00273 |       0.01266 |       0.97753
     -0.01517 |       0.00000 |       0.00268 |       0.01096 |       0.97831
Evaluating losses...
     -0.01626 |       0.00000 |       0.00260 |       0.01162 |     

     -0.01944 |       0.00000 |      4.23e-05 |       0.00739 |       1.02959
     -0.01973 |       0.00000 |      4.06e-05 |       0.00825 |       1.03092
Evaluating losses...
     -0.02227 |       0.00000 |      3.87e-05 |       0.00824 |       1.02998
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.54         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 2744          |
| TimeElapsed     | 8.23e+03      |
| TimestepsSoFar  | 6197248       |
| ev_tdlam_before | -1.51         |
| loss_ent        | 1.0299821     |
| loss_kl         | 0.008243887   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.022269752  |
| loss_vf_loss    | 3.8698145e-05 |
-----------------------------------
********** Iteration 1513 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00123 |       0.00000 |       0.00508 |       0.00190 |       0.98932
     -0.00680 |       0.00000 | 

********** Iteration 1518 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00060 |       0.00000 |       0.00150 |       0.00228 |       1.01915
     -0.00567 |       0.00000 |       0.00090 |       0.00507 |       1.01646
     -0.00874 |       0.00000 |       0.00079 |       0.00468 |       1.01748
     -0.01058 |       0.00000 |       0.00074 |       0.00445 |       1.01496
     -0.01052 |       0.00000 |       0.00071 |       0.00554 |       1.01336
     -0.01203 |       0.00000 |       0.00069 |       0.00530 |       1.01281
     -0.01278 |       0.00000 |       0.00066 |       0.00568 |       1.01478
     -0.01355 |       0.00000 |       0.00066 |       0.00574 |       1.01332
     -0.01419 |       0.00000 |       0.00064 |       0.00647 |       1.01397
     -0.01398 |       0.00000 |       0.00063 |       0.00735 |       1.01436
Evaluating losses...
     -0.01524 |       0.00000 |       0.00061 |       0.00739 |     

     -0.01239 |       0.00000 |       0.00090 |       0.00500 |       0.99173
     -0.01279 |       0.00000 |       0.00087 |       0.00485 |       0.99310
Evaluating losses...
     -0.01358 |       0.00000 |       0.00081 |       0.00527 |       0.99236
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.5         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2759         |
| TimeElapsed     | 8.27e+03     |
| TimestepsSoFar  | 6242304      |
| ev_tdlam_before | 0.397        |
| loss_ent        | 0.992364     |
| loss_kl         | 0.005274971  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013579946 |
| loss_vf_loss    | 0.000813993  |
----------------------------------
********** Iteration 1524 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00140 |       0.00000 |       0.00196 |       0.00266 |       1.06021
     -0.00248 |       0.00000 |       0.00131 

********** Iteration 1529 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00050 |       0.00000 |       0.00190 |       0.00357 |       0.96692
     -0.00635 |       0.00000 |       0.00179 |       0.00623 |       0.96827
     -0.00837 |       0.00000 |       0.00172 |       0.00469 |       0.96582
     -0.00856 |       0.00000 |       0.00162 |       0.00564 |       0.96905
     -0.00825 |       0.00000 |       0.00157 |       0.01329 |       0.97518
     -0.00998 |       0.00000 |       0.00153 |       0.00809 |       0.97188
     -0.01058 |       0.00000 |       0.00149 |       0.00710 |       0.97013
     -0.01144 |       0.00000 |       0.00148 |       0.00634 |       0.96892
     -0.01202 |       0.00000 |       0.00145 |       0.00591 |       0.96893
     -0.01228 |       0.00000 |       0.00142 |       0.00608 |       0.96823
Evaluating losses...
     -0.01296 |       0.00000 |       0.00135 |       0.00588 |     

     -0.01312 |       0.00000 |       0.00198 |       0.00506 |       0.99087
     -0.01360 |       0.00000 |       0.00193 |       0.00544 |       0.99041
     -0.01364 |       0.00000 |       0.00188 |       0.00595 |       0.99141
Evaluating losses...
     -0.01478 |       0.00000 |       0.00181 |       0.00621 |       0.99000
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.53        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2774         |
| TimeElapsed     | 8.46e+03     |
| TimestepsSoFar  | 6287360      |
| ev_tdlam_before | 0.395        |
| loss_ent        | 0.989997     |
| loss_kl         | 0.006211464  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014784603 |
| loss_vf_loss    | 0.0018117212 |
----------------------------------
********** Iteration 1535 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00297 |       0.00000 |       0.00349 

********** Iteration 1540 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00074 |       0.00000 |       0.00148 |       0.00154 |       0.98221
     -0.00552 |       0.00000 |       0.00116 |       0.00302 |       0.97943
     -0.00894 |       0.00000 |       0.00108 |       0.00301 |       0.98110
     -0.01054 |       0.00000 |       0.00101 |       0.00386 |       0.98129
     -0.01228 |       0.00000 |       0.00095 |       0.00369 |       0.98084
     -0.01294 |       0.00000 |       0.00091 |       0.00463 |       0.98142
     -0.01419 |       0.00000 |       0.00088 |       0.00494 |       0.98276
     -0.01458 |       0.00000 |       0.00082 |       0.00541 |       0.98097
     -0.01538 |       0.00000 |       0.00080 |       0.00552 |       0.98262
     -0.01557 |       0.00000 |       0.00077 |       0.00570 |       0.98234
Evaluating losses...
     -0.01729 |       0.00000 |       0.00072 |       0.00591 |     

     -0.01320 |       0.00000 |       0.00193 |       0.00521 |       0.97961
     -0.01369 |       0.00000 |       0.00191 |       0.00498 |       0.97748
Evaluating losses...
     -0.01437 |       0.00000 |       0.00193 |       0.00505 |       0.97957
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.49        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2789         |
| TimeElapsed     | 8.49e+03     |
| TimestepsSoFar  | 6332416      |
| ev_tdlam_before | 0.309        |
| loss_ent        | 0.97956544   |
| loss_kl         | 0.005050439  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014366479 |
| loss_vf_loss    | 0.0019327517 |
----------------------------------
********** Iteration 1546 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00014 |       0.00000 |       0.00186 |       0.00330 |       1.01341
     -0.00522 |       0.00000 |       0.00161 

********** Iteration 1551 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00122 |       0.00000 |      7.66e-05 |       0.00230 |       0.96592
     -0.00737 |       0.00000 |      5.51e-05 |       0.00258 |       0.96646
     -0.00991 |       0.00000 |      4.55e-05 |       0.00376 |       0.96615
     -0.01126 |       0.00000 |      3.96e-05 |       0.00416 |       0.96528
     -0.01181 |       0.00000 |      3.52e-05 |       0.00488 |       0.96465
     -0.01305 |       0.00000 |      3.23e-05 |       0.00544 |       0.96386
     -0.01412 |       0.00000 |      2.96e-05 |       0.00594 |       0.96327
     -0.01473 |       0.00000 |      2.77e-05 |       0.00634 |       0.96563
     -0.01531 |       0.00000 |      2.62e-05 |       0.00685 |       0.96279
     -0.01558 |       0.00000 |      2.50e-05 |       0.00751 |       0.96409
Evaluating losses...
     -0.01686 |       0.00000 |      2.38e-05 |       0.00736 |     

     -0.00852 |       0.00000 |       0.00144 |       0.00885 |       1.01310
     -0.00922 |       0.00000 |       0.00143 |       0.00657 |       1.01070
     -0.00989 |       0.00000 |       0.00143 |       0.00634 |       1.01257
Evaluating losses...
     -0.01023 |       0.00000 |       0.00136 |       0.00621 |       1.01391
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.47        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2804         |
| TimeElapsed     | 8.54e+03     |
| TimestepsSoFar  | 6377472      |
| ev_tdlam_before | 0.577        |
| loss_ent        | 1.0139067    |
| loss_kl         | 0.006212442  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010229732 |
| loss_vf_loss    | 0.0013579763 |
----------------------------------
********** Iteration 1557 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00245 |       0.00000 |       0.00462 

********** Iteration 1562 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00136 |       0.00000 |       0.00141 |       0.00251 |       0.98315
     -0.00507 |       0.00000 |       0.00099 |       0.00424 |       0.97908
     -0.00734 |       0.00000 |       0.00093 |       0.00419 |       0.98036
     -0.00872 |       0.00000 |       0.00090 |       0.00496 |       0.97785
     -0.00978 |       0.00000 |       0.00089 |       0.00587 |       0.98064
     -0.01110 |       0.00000 |       0.00088 |       0.00597 |       0.98243
     -0.01127 |       0.00000 |       0.00088 |       0.00690 |       0.98301
     -0.01225 |       0.00000 |       0.00086 |       0.00786 |       0.98348
     -0.01259 |       0.00000 |       0.00087 |       0.00755 |       0.98452
     -0.01305 |       0.00000 |       0.00085 |       0.00771 |       0.98714
Evaluating losses...
     -0.01421 |       0.00000 |       0.00085 |       0.00812 |     

     -0.01598 |       0.00000 |       0.00183 |       0.00747 |       0.98102
     -0.01611 |       0.00000 |       0.00182 |       0.00882 |       0.97547
     -0.01730 |       0.00000 |       0.00178 |       0.00733 |       0.97766
Evaluating losses...
     -0.01825 |       0.00000 |       0.00173 |       0.00756 |       0.97770
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.44        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2819         |
| TimeElapsed     | 8.58e+03     |
| TimestepsSoFar  | 6422528      |
| ev_tdlam_before | 0.593        |
| loss_ent        | 0.9776983    |
| loss_kl         | 0.007560108  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018248262 |
| loss_vf_loss    | 0.0017270277 |
----------------------------------
********** Iteration 1568 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00078 |       0.00000 |       0.00506 

********** Iteration 1573 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00311 |       0.00000 |       0.00029 |       0.00145 |       0.98265
     -0.00388 |       0.00000 |       0.00016 |       0.00211 |       0.98336
     -0.00708 |       0.00000 |       0.00012 |       0.00318 |       0.98385
     -0.00835 |       0.00000 |       0.00010 |       0.00322 |       0.98374
     -0.00983 |       0.00000 |      9.29e-05 |       0.00436 |       0.98621
     -0.01011 |       0.00000 |      8.57e-05 |       0.00462 |       0.98416
     -0.01087 |       0.00000 |      7.75e-05 |       0.00463 |       0.98649
     -0.01151 |       0.00000 |      7.27e-05 |       0.00513 |       0.98550
     -0.01194 |       0.00000 |      6.76e-05 |       0.00575 |       0.98599
     -0.01233 |       0.00000 |      6.49e-05 |       0.00599 |       0.98388
Evaluating losses...
     -0.01316 |       0.00000 |      6.03e-05 |       0.00633 |     

     -0.01535 |       0.00000 |       0.00123 |       0.00802 |       1.00269
     -0.01556 |       0.00000 |       0.00120 |       0.00807 |       1.00288
     -0.01609 |       0.00000 |       0.00119 |       0.00847 |       1.00261
Evaluating losses...
     -0.01651 |       0.00000 |       0.00112 |       0.00847 |       1.00213
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.43        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 2834         |
| TimeElapsed     | 8.61e+03     |
| TimestepsSoFar  | 6467584      |
| ev_tdlam_before | 0.442        |
| loss_ent        | 1.0021328    |
| loss_kl         | 0.008466791  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016513033 |
| loss_vf_loss    | 0.0011231337 |
----------------------------------
********** Iteration 1579 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00597 |       0.00000 |       0.00341 

********** Iteration 1584 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00036 |       0.00000 |       0.00048 |       0.00114 |       1.02808
     -0.00296 |       0.00000 |       0.00034 |       0.00167 |       1.02528
     -0.00480 |       0.00000 |       0.00024 |       0.00305 |       1.02364
     -0.00620 |       0.00000 |       0.00015 |       0.00289 |       1.02439
     -0.00713 |       0.00000 |      8.96e-05 |       0.00355 |       1.02333
     -0.00761 |       0.00000 |      5.96e-05 |       0.00395 |       1.02522
     -0.00827 |       0.00000 |      4.52e-05 |       0.00409 |       1.02363
     -0.00883 |       0.00000 |      4.28e-05 |       0.00467 |       1.02208
     -0.00921 |       0.00000 |      3.91e-05 |       0.00528 |       1.02076
     -0.00945 |       0.00000 |      3.76e-05 |       0.00527 |       1.02178
Evaluating losses...
     -0.01045 |       0.00000 |      3.70e-05 |       0.00533 |     

     -0.01349 |       0.00000 |       0.00289 |       0.00599 |       0.98181
     -0.01482 |       0.00000 |       0.00284 |       0.00574 |       0.98217
     -0.01531 |       0.00000 |       0.00279 |       0.00645 |       0.98347
     -0.01632 |       0.00000 |       0.00271 |       0.00682 |       0.98227
     -0.01661 |       0.00000 |       0.00268 |       0.00703 |       0.98270
Evaluating losses...
     -0.01848 |       0.00000 |       0.00258 |       0.00784 |       0.98245
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.42        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2849         |
| TimeElapsed     | 8.84e+03     |
| TimestepsSoFar  | 6512640      |
| ev_tdlam_before | 0.256        |
| loss_ent        | 0.9824536    |
| loss_kl         | 0.007838263  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.018476913 |
| loss_vf_loss    | 0.0025817107 |
----------------------------------
********** Iteration 

********** Iteration 1595 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00032 |       0.00000 |       0.00399 |       0.00324 |       0.98310
     -0.00720 |       0.00000 |       0.00359 |       0.00328 |       0.98837
     -0.00905 |       0.00000 |       0.00342 |       0.00372 |       0.98802
     -0.01087 |       0.00000 |       0.00336 |       0.00443 |       0.98718
     -0.01171 |       0.00000 |       0.00333 |       0.00478 |       0.98685
     -0.01284 |       0.00000 |       0.00324 |       0.00506 |       0.98702
     -0.01377 |       0.00000 |       0.00317 |       0.00545 |       0.98753
     -0.01435 |       0.00000 |       0.00315 |       0.00612 |       0.98676
     -0.01457 |       0.00000 |       0.00310 |       0.00717 |       0.98642
     -0.01493 |       0.00000 |       0.00310 |       0.00659 |       0.98657
Evaluating losses...
     -0.01616 |       0.00000 |       0.00297 |       0.00691 |     

     -0.01576 |       0.00000 |       0.00184 |       0.00704 |       0.94582
     -0.01654 |       0.00000 |       0.00176 |       0.00780 |       0.94782
Evaluating losses...
     -0.01749 |       0.00000 |       0.00170 |       0.00758 |       0.94657
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.41        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2864         |
| TimeElapsed     | 8.88e+03     |
| TimestepsSoFar  | 6557696      |
| ev_tdlam_before | 0.517        |
| loss_ent        | 0.94657063   |
| loss_kl         | 0.007584583  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.017490495 |
| loss_vf_loss    | 0.0016955574 |
----------------------------------
********** Iteration 1601 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00102 |       0.00000 |       0.00272 |       0.00329 |       0.97508
     -0.00409 |       0.00000 |       0.00192 

********** Iteration 1606 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00193 |       0.00000 |       0.00358 |       0.00427 |       0.95657
     -0.00613 |       0.00000 |       0.00225 |       0.00620 |       0.95465
     -0.01159 |       0.00000 |       0.00190 |       0.00497 |       0.95467
     -0.01374 |       0.00000 |       0.00177 |       0.00574 |       0.95291
     -0.01476 |       0.00000 |       0.00168 |       0.00697 |       0.95313
     -0.01624 |       0.00000 |       0.00160 |       0.00707 |       0.95044
     -0.01673 |       0.00000 |       0.00154 |       0.00773 |       0.95169
     -0.01738 |       0.00000 |       0.00154 |       0.00772 |       0.95069
     -0.01789 |       0.00000 |       0.00146 |       0.00891 |       0.95095
     -0.01842 |       0.00000 |       0.00146 |       0.00920 |       0.95225
Evaluating losses...
     -0.01941 |       0.00000 |       0.00133 |       0.00909 |     

     -0.01165 |       0.00000 |       0.00298 |       0.00660 |       0.97688
     -0.01307 |       0.00000 |       0.00286 |       0.00831 |       0.97345
     -0.01300 |       0.00000 |       0.00275 |       0.00805 |       0.97542
Evaluating losses...
     -0.01465 |       0.00000 |       0.00262 |       0.00734 |       0.97661
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.4         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2879         |
| TimeElapsed     | 8.93e+03     |
| TimestepsSoFar  | 6602752      |
| ev_tdlam_before | 0.496        |
| loss_ent        | 0.97660923   |
| loss_kl         | 0.0073399697 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014646696 |
| loss_vf_loss    | 0.002624538  |
----------------------------------
********** Iteration 1612 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00042 |       0.00000 |       0.00040 

********** Iteration 1617 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00104 |       0.00000 |       0.00022 |       0.00230 |       0.97466
     -0.00714 |       0.00000 |       0.00015 |       0.00361 |       0.97175
     -0.01128 |       0.00000 |       0.00012 |       0.00416 |       0.97086
     -0.01273 |       0.00000 |       0.00010 |       0.00457 |       0.97059
     -0.01422 |       0.00000 |      9.42e-05 |       0.00459 |       0.97049
     -0.01505 |       0.00000 |      8.57e-05 |       0.00500 |       0.96995
     -0.01564 |       0.00000 |      8.04e-05 |       0.00556 |       0.97175
     -0.01655 |       0.00000 |      7.84e-05 |       0.00549 |       0.96944
     -0.01721 |       0.00000 |      7.31e-05 |       0.00632 |       0.96879
     -0.01810 |       0.00000 |      7.06e-05 |       0.00622 |       0.96864
Evaluating losses...
     -0.01959 |       0.00000 |      6.84e-05 |       0.00620 |     

     -0.01991 |       0.00000 |       0.00014 |       0.00679 |       0.97239
     -0.02072 |       0.00000 |       0.00014 |       0.00760 |       0.97327
Evaluating losses...
     -0.02228 |       0.00000 |       0.00013 |       0.00690 |       0.97487
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.31         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2894          |
| TimeElapsed     | 8.97e+03      |
| TimestepsSoFar  | 6647808       |
| ev_tdlam_before | -0.614        |
| loss_ent        | 0.97487354    |
| loss_kl         | 0.006897909   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.022276912  |
| loss_vf_loss    | 0.00013432086 |
-----------------------------------
********** Iteration 1623 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00118 |       0.00000 |       0.00243 |       0.00119 |       0.93342
     -0.00526 |       0.00000 | 

********** Iteration 1628 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00096 |       0.00000 |       0.00214 |       0.00177 |       0.95459
     -0.00419 |       0.00000 |       0.00195 |       0.00765 |       0.95510
     -0.00929 |       0.00000 |       0.00180 |       0.00412 |       0.95036
     -0.01005 |       0.00000 |       0.00169 |       0.00400 |       0.94937
     -0.01086 |       0.00000 |       0.00159 |       0.00595 |       0.94713
     -0.01170 |       0.00000 |       0.00147 |       0.00509 |       0.94709
     -0.01226 |       0.00000 |       0.00140 |       0.00551 |       0.94743
     -0.01250 |       0.00000 |       0.00131 |       0.00537 |       0.94743
     -0.01288 |       0.00000 |       0.00125 |       0.00570 |       0.94788
     -0.01325 |       0.00000 |       0.00120 |       0.00559 |       0.94703
Evaluating losses...
     -0.01372 |       0.00000 |       0.00114 |       0.00551 |     

     -0.01229 |       0.00000 |       0.00144 |       0.00577 |       0.89412
     -0.01229 |       0.00000 |       0.00143 |       0.00608 |       0.89526
     -0.01306 |       0.00000 |       0.00136 |       0.00648 |       0.89391
Evaluating losses...
     -0.01344 |       0.00000 |       0.00128 |       0.00643 |       0.89234
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.37        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2909         |
| TimeElapsed     | 9.01e+03     |
| TimestepsSoFar  | 6692864      |
| ev_tdlam_before | 0.352        |
| loss_ent        | 0.8923446    |
| loss_kl         | 0.006433705  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01343538  |
| loss_vf_loss    | 0.0012755124 |
----------------------------------
********** Iteration 1634 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00177 |       0.00000 |       0.00051 

********** Iteration 1639 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00179 |       0.00000 |       0.00010 |       0.00370 |       0.95216
     -0.00748 |       0.00000 |      7.74e-05 |       0.00356 |       0.95246
     -0.01051 |       0.00000 |      6.74e-05 |       0.00381 |       0.95317
     -0.01298 |       0.00000 |      5.98e-05 |       0.00442 |       0.95283
     -0.01380 |       0.00000 |      5.61e-05 |       0.00493 |       0.95246
     -0.01564 |       0.00000 |      5.18e-05 |       0.00555 |       0.95347
     -0.01628 |       0.00000 |      4.87e-05 |       0.00546 |       0.95404
     -0.01732 |       0.00000 |      4.67e-05 |       0.00558 |       0.95355
     -0.01732 |       0.00000 |      4.37e-05 |       0.00640 |       0.95383
     -0.01827 |       0.00000 |      4.20e-05 |       0.00666 |       0.95512
Evaluating losses...
     -0.02005 |       0.00000 |      4.11e-05 |       0.00684 |     

     -0.00752 |       0.00000 |       0.00074 |       0.00506 |       0.96229
     -0.00856 |       0.00000 |       0.00068 |       0.00427 |       0.96455
     -0.00885 |       0.00000 |       0.00065 |       0.00439 |       0.96495
Evaluating losses...
     -0.00939 |       0.00000 |       0.00057 |       0.00427 |       0.96467
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.33         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 2924          |
| TimeElapsed     | 9.05e+03      |
| TimestepsSoFar  | 6737920       |
| ev_tdlam_before | 0.241         |
| loss_ent        | 0.9646677     |
| loss_kl         | 0.0042744423  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.009391474  |
| loss_vf_loss    | 0.00057337026 |
-----------------------------------
********** Iteration 1645 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00158 |       0.00000 | 

********** Iteration 1650 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00202 |       0.00000 |      5.03e-05 |       0.00260 |       0.96171
     -0.00609 |       0.00000 |      3.37e-05 |       0.00292 |       0.96058
     -0.00843 |       0.00000 |      2.75e-05 |       0.00330 |       0.96306
     -0.01030 |       0.00000 |      2.35e-05 |       0.00382 |       0.96372
     -0.01127 |       0.00000 |      2.10e-05 |       0.00375 |       0.96292
     -0.01206 |       0.00000 |      1.92e-05 |       0.00442 |       0.96372
     -0.01297 |       0.00000 |      1.72e-05 |       0.00457 |       0.96162
     -0.01361 |       0.00000 |      1.64e-05 |       0.00484 |       0.96464
     -0.01420 |       0.00000 |      1.56e-05 |       0.00534 |       0.96350
     -0.01468 |       0.00000 |      1.48e-05 |       0.00603 |       0.96425
Evaluating losses...
     -0.01591 |       0.00000 |      1.42e-05 |       0.00632 |     

     -0.01056 |       0.00000 |       0.00076 |       0.00436 |       0.93195
     -0.01096 |       0.00000 |       0.00075 |       0.00495 |       0.93241
Evaluating losses...
     -0.01154 |       0.00000 |       0.00072 |       0.00589 |       0.92861
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.33         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 2938          |
| TimeElapsed     | 9.29e+03      |
| TimestepsSoFar  | 6782976       |
| ev_tdlam_before | 0.577         |
| loss_ent        | 0.92860866    |
| loss_kl         | 0.0058925706  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.011539105  |
| loss_vf_loss    | 0.00072020595 |
-----------------------------------
********** Iteration 1656 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00103 |       0.00000 |       0.00014 |       0.00285 |       0.97970
     -0.00630 |       0.00000 | 

********** Iteration 1661 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00276 |       0.00000 |       0.00013 |       0.00151 |       0.96034
     -0.00278 |       0.00000 |      6.42e-05 |       0.00271 |       0.95943
     -0.00541 |       0.00000 |      5.28e-05 |       0.00314 |       0.96224
     -0.00700 |       0.00000 |      4.66e-05 |       0.00369 |       0.96103
     -0.00735 |       0.00000 |      4.34e-05 |       0.00430 |       0.96434
     -0.00780 |       0.00000 |      4.06e-05 |       0.00506 |       0.96268
     -0.00893 |       0.00000 |      3.88e-05 |       0.00543 |       0.96245
     -0.00901 |       0.00000 |      3.74e-05 |       0.00562 |       0.96227
     -0.01036 |       0.00000 |      3.66e-05 |       0.00645 |       0.96182
     -0.01116 |       0.00000 |      3.56e-05 |       0.00581 |       0.96357
Evaluating losses...
     -0.01335 |       0.00000 |      3.68e-05 |       0.00547 |     

     -0.01313 |       0.00000 |      8.24e-05 |       0.00679 |       0.95980
     -0.01346 |       0.00000 |      7.79e-05 |       0.00670 |       0.95799
     -0.01395 |       0.00000 |      7.54e-05 |       0.00753 |       0.95914
Evaluating losses...
     -0.01475 |       0.00000 |      7.52e-05 |       0.00717 |       0.95612
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.34        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2954         |
| TimeElapsed     | 9.33e+03     |
| TimestepsSoFar  | 6828032      |
| ev_tdlam_before | -2.17        |
| loss_ent        | 0.9561225    |
| loss_kl         | 0.00716576   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014754717 |
| loss_vf_loss    | 7.520223e-05 |
----------------------------------
********** Iteration 1667 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00197 |       0.00000 |       0.00309 

********** Iteration 1672 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00168 |       0.00000 |       0.00427 |       0.00269 |       0.98831
     -0.00760 |       0.00000 |       0.00377 |       0.00342 |       0.98766
     -0.00968 |       0.00000 |       0.00348 |       0.00413 |       0.99261
     -0.01162 |       0.00000 |       0.00333 |       0.00440 |       0.99167
     -0.01280 |       0.00000 |       0.00320 |       0.00458 |       0.99166
     -0.01377 |       0.00000 |       0.00307 |       0.00537 |       0.99431
     -0.01232 |       0.00000 |       0.00298 |       0.00576 |       0.99184
     -0.01478 |       0.00000 |       0.00288 |       0.00562 |       0.99234
     -0.01523 |       0.00000 |       0.00274 |       0.00607 |       0.99287
     -0.01582 |       0.00000 |       0.00256 |       0.00636 |       0.99285
Evaluating losses...
     -0.01628 |       0.00000 |       0.00243 |       0.00660 |     

     -0.01196 |       0.00000 |       0.00142 |       0.00690 |       0.94347
     -0.01246 |       0.00000 |       0.00140 |       0.00723 |       0.94385
     -0.01359 |       0.00000 |       0.00139 |       0.00830 |       0.94238
Evaluating losses...
     -0.01491 |       0.00000 |       0.00131 |       0.00815 |       0.94231
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.34        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2969         |
| TimeElapsed     | 9.37e+03     |
| TimestepsSoFar  | 6873088      |
| ev_tdlam_before | 0.697        |
| loss_ent        | 0.9423123    |
| loss_kl         | 0.008153465  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014913794 |
| loss_vf_loss    | 0.0013082572 |
----------------------------------
********** Iteration 1678 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00026 |       0.00000 |       0.00831 

********** Iteration 1683 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00622 |       0.00000 |       0.00643 |       0.00200 |       0.90654
     -0.00175 |       0.00000 |       0.00467 |       0.00316 |       0.90717
     -0.00609 |       0.00000 |       0.00393 |       0.00250 |       0.90611
     -0.00656 |       0.00000 |       0.00355 |       0.00296 |       0.90745
     -0.00813 |       0.00000 |       0.00326 |       0.00359 |       0.90637
     -0.01005 |       0.00000 |       0.00308 |       0.00430 |       0.90616
     -0.00984 |       0.00000 |       0.00296 |       0.00475 |       0.90920
     -0.01087 |       0.00000 |       0.00288 |       0.00546 |       0.90793
     -0.01180 |       0.00000 |       0.00281 |       0.00578 |       0.90537
     -0.01195 |       0.00000 |       0.00272 |       0.00634 |       0.90729
Evaluating losses...
     -0.01413 |       0.00000 |       0.00261 |       0.00598 |     

     -0.01323 |       0.00000 |       0.00118 |       0.00649 |       0.96532
     -0.01368 |       0.00000 |       0.00118 |       0.00613 |       0.96554
Evaluating losses...
     -0.01540 |       0.00000 |       0.00112 |       0.00643 |       0.96398
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.27        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2984         |
| TimeElapsed     | 9.42e+03     |
| TimestepsSoFar  | 6918144      |
| ev_tdlam_before | 0.295        |
| loss_ent        | 0.96397996   |
| loss_kl         | 0.0064341985 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015403687 |
| loss_vf_loss    | 0.001117989  |
----------------------------------
********** Iteration 1689 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00161 |       0.00000 |       0.00222 |       0.00106 |       0.94884
     -0.00286 |       0.00000 |       0.00107 

********** Iteration 1694 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00026 |       0.00000 |       0.00076 |       0.00360 |       0.90503
     -0.00745 |       0.00000 |       0.00045 |       0.00288 |       0.89949
     -0.01053 |       0.00000 |       0.00036 |       0.00355 |       0.89983
     -0.01263 |       0.00000 |       0.00032 |       0.00454 |       0.89763
     -0.01383 |       0.00000 |       0.00029 |       0.00516 |       0.89927
     -0.01540 |       0.00000 |       0.00027 |       0.00529 |       0.89828
     -0.01623 |       0.00000 |       0.00026 |       0.00578 |       0.90001
     -0.01724 |       0.00000 |       0.00025 |       0.00623 |       0.89861
     -0.01836 |       0.00000 |       0.00024 |       0.00660 |       0.89929
     -0.01857 |       0.00000 |       0.00023 |       0.00693 |       0.90068
Evaluating losses...
     -0.02034 |       0.00000 |       0.00023 |       0.00721 |     

     -0.01320 |       0.00000 |       0.00219 |       0.00583 |       0.89212
     -0.01411 |       0.00000 |       0.00215 |       0.00542 |       0.88971
Evaluating losses...
     -0.01496 |       0.00000 |       0.00210 |       0.00574 |       0.89223
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.35        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 2999         |
| TimeElapsed     | 9.46e+03     |
| TimestepsSoFar  | 6963200      |
| ev_tdlam_before | 0.3          |
| loss_ent        | 0.8922329    |
| loss_kl         | 0.005744971  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014955383 |
| loss_vf_loss    | 0.002104219  |
----------------------------------
********** Iteration 1700 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00105 |       0.00000 |       0.00183 |       0.00212 |       0.95728
     -0.00595 |       0.00000 |       0.00164 

********** Iteration 1705 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00027 |       0.00000 |       0.01132 |       0.00267 |       0.85566
     -0.00628 |       0.00000 |       0.00808 |       0.00585 |       0.85678
     -0.00991 |       0.00000 |       0.00715 |       0.00492 |       0.85641
     -0.01112 |       0.00000 |       0.00682 |       0.00583 |       0.85871
     -0.01245 |       0.00000 |       0.00647 |       0.00622 |       0.85866
     -0.01372 |       0.00000 |       0.00626 |       0.00661 |       0.85860
     -0.01381 |       0.00000 |       0.00609 |       0.00763 |       0.85779
     -0.01567 |       0.00000 |       0.00597 |       0.00771 |       0.85774
     -0.01606 |       0.00000 |       0.00585 |       0.00828 |       0.85899
     -0.01662 |       0.00000 |       0.00573 |       0.00850 |       0.85915
Evaluating losses...
     -0.01827 |       0.00000 |       0.00543 |       0.00867 |     

     -0.01079 |       0.00000 |       0.00119 |       0.00413 |       0.91214
     -0.01106 |       0.00000 |       0.00117 |       0.00418 |       0.91194
     -0.01137 |       0.00000 |       0.00117 |       0.00462 |       0.91099
     -0.01158 |       0.00000 |       0.00115 |       0.00435 |       0.91214
Evaluating losses...
     -0.01227 |       0.00000 |       0.00114 |       0.00428 |       0.91212
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.32        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3013         |
| TimeElapsed     | 9.72e+03     |
| TimestepsSoFar  | 7008256      |
| ev_tdlam_before | 0.145        |
| loss_ent        | 0.9121183    |
| loss_kl         | 0.004283887  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012274715 |
| loss_vf_loss    | 0.0011404    |
----------------------------------
********** Iteration 1711 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 1716 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00121 |       0.00000 |       0.00228 |       0.00326 |       0.95143
     -0.00489 |       0.00000 |       0.00109 |       0.00319 |       0.94967
     -0.00789 |       0.00000 |       0.00090 |       0.00375 |       0.94792
     -0.00799 |       0.00000 |       0.00078 |       0.00454 |       0.95033
     -0.00907 |       0.00000 |       0.00069 |       0.00410 |       0.95016
     -0.00902 |       0.00000 |       0.00060 |       0.00463 |       0.94927
     -0.01044 |       0.00000 |       0.00055 |       0.00484 |       0.94840
     -0.00911 |       0.00000 |       0.00051 |       0.00552 |       0.95044
     -0.01178 |       0.00000 |       0.00049 |       0.00579 |       0.94827
     -0.01156 |       0.00000 |       0.00045 |       0.00557 |       0.94948
Evaluating losses...
     -0.01299 |       0.00000 |       0.00044 |       0.00561 |     

     -0.01146 |       0.00000 |      8.73e-05 |       0.00588 |       0.92425
     -0.01349 |       0.00000 |      8.36e-05 |       0.00618 |       0.92489
     -0.01065 |       0.00000 |      8.28e-05 |       0.00922 |       0.92711
Evaluating losses...
     -0.01443 |       0.00000 |      7.94e-05 |       0.00759 |       0.92455
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.35        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3028         |
| TimeElapsed     | 9.76e+03     |
| TimestepsSoFar  | 7053312      |
| ev_tdlam_before | -5.55        |
| loss_ent        | 0.9245459    |
| loss_kl         | 0.0075910552 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014427885 |
| loss_vf_loss    | 7.938807e-05 |
----------------------------------
********** Iteration 1722 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00041 |       0.00000 |       0.00197 

********** Iteration 1727 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00057 |       0.00000 |       0.00218 |       0.00192 |       0.94671
     -0.00631 |       0.00000 |       0.00182 |       0.00253 |       0.94577
     -0.00775 |       0.00000 |       0.00167 |       0.00277 |       0.94814
     -0.00914 |       0.00000 |       0.00152 |       0.00366 |       0.94546
     -0.01091 |       0.00000 |       0.00143 |       0.00400 |       0.94673
     -0.01203 |       0.00000 |       0.00136 |       0.00393 |       0.94633
     -0.01281 |       0.00000 |       0.00134 |       0.00405 |       0.94518
     -0.01321 |       0.00000 |       0.00129 |       0.00459 |       0.94527
     -0.01337 |       0.00000 |       0.00126 |       0.00485 |       0.94702
     -0.01452 |       0.00000 |       0.00123 |       0.00521 |       0.94572
Evaluating losses...
     -0.01571 |       0.00000 |       0.00122 |       0.00521 |     

     -0.01251 |       0.00000 |       0.00217 |       0.00722 |       0.88770
     -0.01240 |       0.00000 |       0.00213 |       0.00749 |       0.88798
     -0.01300 |       0.00000 |       0.00212 |       0.00758 |       0.88574
Evaluating losses...
     -0.01355 |       0.00000 |       0.00200 |       0.00804 |       0.88745
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.33        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3043         |
| TimeElapsed     | 9.81e+03     |
| TimestepsSoFar  | 7098368      |
| ev_tdlam_before | 0.531        |
| loss_ent        | 0.8874535    |
| loss_kl         | 0.008037321  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013552457 |
| loss_vf_loss    | 0.002002794  |
----------------------------------
********** Iteration 1733 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00019 |       0.00000 |       0.00018 

********** Iteration 1738 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00059 |       0.00000 |       0.00018 |       0.00132 |       0.89449
     -0.00441 |       0.00000 |      7.25e-05 |       0.00231 |       0.89485
     -0.00611 |       0.00000 |      5.81e-05 |       0.00345 |       0.89444
     -0.00737 |       0.00000 |      5.45e-05 |       0.00370 |       0.89627
     -0.00808 |       0.00000 |      5.03e-05 |       0.00432 |       0.89835
     -0.00884 |       0.00000 |      4.70e-05 |       0.00520 |       0.89600
     -0.00906 |       0.00000 |      4.61e-05 |       0.00568 |       0.89698
     -0.00954 |       0.00000 |      4.31e-05 |       0.00629 |       0.89658
     -0.01041 |       0.00000 |      4.29e-05 |       0.00700 |       0.89839
     -0.01083 |       0.00000 |      4.13e-05 |       0.00714 |       0.89772
Evaluating losses...
     -0.01173 |       0.00000 |      4.10e-05 |       0.00721 |     

     -0.01130 |       0.00000 |       0.00055 |       0.00338 |       0.89571
     -0.01093 |       0.00000 |       0.00054 |       0.00401 |       0.89562
     -0.01189 |       0.00000 |       0.00052 |       0.00375 |       0.89575
Evaluating losses...
     -0.01219 |       0.00000 |       0.00049 |       0.00383 |       0.89586
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.27         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3058          |
| TimeElapsed     | 9.85e+03      |
| TimestepsSoFar  | 7143424       |
| ev_tdlam_before | 0.193         |
| loss_ent        | 0.89585793    |
| loss_kl         | 0.0038267432  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.012188872  |
| loss_vf_loss    | 0.00048500777 |
-----------------------------------
********** Iteration 1744 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00457 |       0.00000 | 

********** Iteration 1749 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00126 |       0.00000 |       0.00269 |       0.00128 |       0.90841
     -0.00436 |       0.00000 |       0.00185 |       0.00310 |       0.90684
     -0.00692 |       0.00000 |       0.00162 |       0.00354 |       0.90761
     -0.00719 |       0.00000 |       0.00147 |       0.00373 |       0.90506
     -0.00878 |       0.00000 |       0.00132 |       0.00382 |       0.90608
     -0.00925 |       0.00000 |       0.00103 |       0.00405 |       0.90691
     -0.00965 |       0.00000 |       0.00076 |       0.00429 |       0.90673
     -0.01004 |       0.00000 |       0.00074 |       0.00476 |       0.90640
     -0.01037 |       0.00000 |       0.00070 |       0.00521 |       0.90831
     -0.01081 |       0.00000 |       0.00066 |       0.00526 |       0.90524
Evaluating losses...
     -0.01190 |       0.00000 |       0.00061 |       0.00502 |     

     -0.01229 |       0.00000 |       0.00037 |       0.00525 |       0.92964
     -0.01242 |       0.00000 |       0.00036 |       0.00612 |       0.92948
     -0.01316 |       0.00000 |       0.00034 |       0.00594 |       0.92985
Evaluating losses...
     -0.01417 |       0.00000 |       0.00032 |       0.00655 |       0.92878
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.24        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3073         |
| TimeElapsed     | 9.89e+03     |
| TimestepsSoFar  | 7188480      |
| ev_tdlam_before | 0.78         |
| loss_ent        | 0.92877585   |
| loss_kl         | 0.0065468177 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014170306 |
| loss_vf_loss    | 0.0003211604 |
----------------------------------
********** Iteration 1755 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00073 |       0.00000 |       0.00031 

********** Iteration 1760 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00164 |       0.00000 |      7.37e-05 |       0.00254 |       0.93698
     -0.00703 |       0.00000 |      5.02e-05 |       0.00401 |       0.93911
     -0.01106 |       0.00000 |      3.80e-05 |       0.00375 |       0.93859
     -0.01281 |       0.00000 |      3.28e-05 |       0.00442 |       0.94027
     -0.01407 |       0.00000 |      2.92e-05 |       0.00504 |       0.94243
     -0.01554 |       0.00000 |      2.75e-05 |       0.00556 |       0.94155
     -0.01639 |       0.00000 |      2.51e-05 |       0.00640 |       0.94311
     -0.01738 |       0.00000 |      2.31e-05 |       0.00664 |       0.94141
     -0.01803 |       0.00000 |      2.18e-05 |       0.00697 |       0.94163
     -0.01780 |       0.00000 |      2.09e-05 |       0.00793 |       0.94157
Evaluating losses...
     -0.01921 |       0.00000 |      1.98e-05 |       0.00867 |     

     -0.01162 |       0.00000 |       0.00160 |       0.00595 |       0.92912
     -0.01181 |       0.00000 |       0.00155 |       0.00661 |       0.92936
     -0.01250 |       0.00000 |       0.00151 |       0.00711 |       0.93106
Evaluating losses...
     -0.01283 |       0.00000 |       0.00147 |       0.00728 |       0.93241
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.31        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3088         |
| TimeElapsed     | 9.94e+03     |
| TimestepsSoFar  | 7233536      |
| ev_tdlam_before | -0.0709      |
| loss_ent        | 0.9324129    |
| loss_kl         | 0.007275578  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012825202 |
| loss_vf_loss    | 0.0014672932 |
----------------------------------
********** Iteration 1766 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00131 |       0.00000 |       0.00130 

********** Iteration 1771 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00054 |       0.00000 |       0.00290 |       0.00286 |       0.92781
     -0.00425 |       0.00000 |       0.00203 |       0.00418 |       0.92405
     -0.00703 |       0.00000 |       0.00188 |       0.00390 |       0.92578
     -0.00785 |       0.00000 |       0.00174 |       0.00437 |       0.92722
     -0.00934 |       0.00000 |       0.00162 |       0.00369 |       0.92998
     -0.01021 |       0.00000 |       0.00146 |       0.00396 |       0.92954
     -0.01104 |       0.00000 |       0.00138 |       0.00398 |       0.93055
     -0.01084 |       0.00000 |       0.00133 |       0.00465 |       0.93096
     -0.01153 |       0.00000 |       0.00128 |       0.00475 |       0.92849
     -0.01167 |       0.00000 |       0.00128 |       0.00481 |       0.93087
Evaluating losses...
     -0.01305 |       0.00000 |       0.00121 |       0.00473 |     

     -0.01459 |       0.00000 |       0.00167 |       0.00684 |       0.96766
     -0.01538 |       0.00000 |       0.00165 |       0.00691 |       0.96801
Evaluating losses...
     -0.01688 |       0.00000 |       0.00159 |       0.00738 |       0.96902
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.28        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3103         |
| TimeElapsed     | 1.02e+04     |
| TimestepsSoFar  | 7278592      |
| ev_tdlam_before | 0.555        |
| loss_ent        | 0.96902376   |
| loss_kl         | 0.00737751   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01688443  |
| loss_vf_loss    | 0.0015863526 |
----------------------------------
********** Iteration 1777 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00015 |       0.00000 |       0.00258 |       0.00081 |       0.92677
     -0.00597 |       0.00000 |       0.00236 

********** Iteration 1782 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00052 |       0.00000 |       0.00191 |       0.00159 |       0.93299
     -0.00589 |       0.00000 |       0.00089 |       0.00288 |       0.93190
     -0.00943 |       0.00000 |       0.00075 |       0.00329 |       0.92949
     -0.01125 |       0.00000 |       0.00067 |       0.00423 |       0.92969
     -0.01117 |       0.00000 |       0.00063 |       0.00543 |       0.93049
     -0.01309 |       0.00000 |       0.00060 |       0.00540 |       0.92811
     -0.01347 |       0.00000 |       0.00057 |       0.00649 |       0.92853
     -0.01418 |       0.00000 |       0.00055 |       0.00642 |       0.93120
     -0.01492 |       0.00000 |       0.00054 |       0.00689 |       0.93026
     -0.01563 |       0.00000 |       0.00053 |       0.00735 |       0.92847
Evaluating losses...
     -0.01692 |       0.00000 |       0.00051 |       0.00734 |     

     -0.01484 |       0.00000 |      5.87e-05 |       0.00473 |       0.94882
     -0.01537 |       0.00000 |      5.47e-05 |       0.00486 |       0.94838
     -0.01564 |       0.00000 |      5.04e-05 |       0.00553 |       0.94640
     -0.01630 |       0.00000 |      4.71e-05 |       0.00585 |       0.94785
Evaluating losses...
     -0.01811 |       0.00000 |      4.55e-05 |       0.00595 |       0.94600
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.24         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3118          |
| TimeElapsed     | 1.02e+04      |
| TimestepsSoFar  | 7323648       |
| ev_tdlam_before | -1.81         |
| loss_ent        | 0.94600445    |
| loss_kl         | 0.0059462674  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.018113716  |
| loss_vf_loss    | 4.5486515e-05 |
-----------------------------------
********** Iteration 1788 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 1793 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00130 |       0.00000 |       0.00401 |       0.00878 |       0.96305
     -0.00742 |       0.00000 |       0.00281 |       0.00578 |       0.96550
     -0.00906 |       0.00000 |       0.00277 |       0.00555 |       0.96730
     -0.01055 |       0.00000 |       0.00273 |       0.00803 |       0.96600
     -0.01123 |       0.00000 |       0.00270 |       0.00812 |       0.96498
     -0.01142 |       0.00000 |       0.00265 |       0.00838 |       0.96467
     -0.01306 |       0.00000 |       0.00263 |       0.00796 |       0.96419
     -0.01350 |       0.00000 |       0.00263 |       0.00761 |       0.96434
     -0.01355 |       0.00000 |       0.00262 |       0.00900 |       0.96287
     -0.01424 |       0.00000 |       0.00260 |       0.00864 |       0.96313
Evaluating losses...
     -0.01519 |       0.00000 |       0.00257 |       0.00887 |     

     -0.01179 |       0.00000 |       0.00134 |       0.00489 |       0.96737
     -0.01167 |       0.00000 |       0.00132 |       0.00510 |       0.96649
Evaluating losses...
     -0.01255 |       0.00000 |       0.00127 |       0.00524 |       0.96809
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.28         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3133          |
| TimeElapsed     | 1.03e+04      |
| TimestepsSoFar  | 7368704       |
| ev_tdlam_before | -0.0118       |
| loss_ent        | 0.96809214    |
| loss_kl         | 0.0052395104  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0125454655 |
| loss_vf_loss    | 0.0012735564  |
-----------------------------------
********** Iteration 1799 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00165 |       0.00000 |       0.00022 |       0.00202 |       0.95383
     -0.00477 |       0.00000 | 

********** Iteration 1804 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00330 |       0.00000 |       0.00118 |       0.00238 |       0.92293
     -0.00896 |       0.00000 |       0.00094 |       0.00591 |       0.92647
     -0.00980 |       0.00000 |       0.00089 |       0.00599 |       0.92415
     -0.01222 |       0.00000 |       0.00087 |       0.00597 |       0.92205
     -0.01311 |       0.00000 |       0.00086 |       0.00605 |       0.92251
     -0.01354 |       0.00000 |       0.00086 |       0.00609 |       0.92078
     -0.01360 |       0.00000 |       0.00085 |       0.00600 |       0.92260
     -0.01435 |       0.00000 |       0.00083 |       0.00613 |       0.92122
     -0.01408 |       0.00000 |       0.00082 |       0.00630 |       0.92026
     -0.01474 |       0.00000 |       0.00081 |       0.00637 |       0.92135
Evaluating losses...
     -0.01536 |       0.00000 |       0.00078 |       0.00642 |     

     -0.01385 |       0.00000 |       0.00525 |       0.00582 |       0.91900
     -0.01469 |       0.00000 |       0.00502 |       0.00574 |       0.91959
     -0.01495 |       0.00000 |       0.00492 |       0.00612 |       0.91781
Evaluating losses...
     -0.01628 |       0.00000 |       0.00470 |       0.00596 |       0.91789
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.3         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3148         |
| TimeElapsed     | 1.03e+04     |
| TimestepsSoFar  | 7413760      |
| ev_tdlam_before | 0.541        |
| loss_ent        | 0.91788787   |
| loss_kl         | 0.0059610424 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.016279094 |
| loss_vf_loss    | 0.0046978462 |
----------------------------------
********** Iteration 1810 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00263 |       0.00000 |       0.00083 

********** Iteration 1815 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00164 |       0.00000 |       0.00093 |       0.00142 |       0.90152
     -0.00206 |       0.00000 |       0.00046 |       0.00182 |       0.90000
     -0.00356 |       0.00000 |       0.00038 |       0.00194 |       0.90149
     -0.00459 |       0.00000 |       0.00038 |       0.00254 |       0.89981
     -0.00531 |       0.00000 |       0.00035 |       0.00299 |       0.89986
     -0.00601 |       0.00000 |       0.00036 |       0.00286 |       0.90101
     -0.00646 |       0.00000 |       0.00033 |       0.00308 |       0.90101
     -0.00680 |       0.00000 |       0.00034 |       0.00335 |       0.90046
     -0.00723 |       0.00000 |       0.00033 |       0.00387 |       0.89929
     -0.00736 |       0.00000 |       0.00031 |       0.00388 |       0.90214
Evaluating losses...
     -0.00866 |       0.00000 |       0.00030 |       0.00420 |     

     -0.01253 |       0.00000 |       0.00244 |       0.00664 |       0.96676
     -0.01288 |       0.00000 |       0.00235 |       0.00673 |       0.96664
     -0.01311 |       0.00000 |       0.00225 |       0.00716 |       0.96680
Evaluating losses...
     -0.01378 |       0.00000 |       0.00213 |       0.00709 |       0.96654
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.26        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3163         |
| TimeElapsed     | 1.03e+04     |
| TimestepsSoFar  | 7458816      |
| ev_tdlam_before | 0.399        |
| loss_ent        | 0.96654284   |
| loss_kl         | 0.007085689  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013779685 |
| loss_vf_loss    | 0.0021313669 |
----------------------------------
********** Iteration 1821 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00027 |       0.00000 |       0.00218 

********** Iteration 1826 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00094 |       0.00000 |       0.00184 |       0.00558 |       0.90516
     -0.00299 |       0.00000 |       0.00152 |       0.00406 |       0.90215
     -0.00451 |       0.00000 |       0.00139 |       0.00611 |       0.90001
     -0.00503 |       0.00000 |       0.00132 |       0.00578 |       0.90700
     -0.00633 |       0.00000 |       0.00127 |       0.00430 |       0.90439
     -0.00705 |       0.00000 |       0.00127 |       0.00433 |       0.90590
     -0.00785 |       0.00000 |       0.00119 |       0.00429 |       0.90768
     -0.00865 |       0.00000 |       0.00117 |       0.00421 |       0.90663
     -0.00962 |       0.00000 |       0.00114 |       0.00424 |       0.90797
     -0.01010 |       0.00000 |       0.00113 |       0.00454 |       0.90726
Evaluating losses...
     -0.01090 |       0.00000 |       0.00107 |       0.00465 |     

     -0.01197 |       0.00000 |       0.00179 |       0.00433 |       0.91232
     -0.01212 |       0.00000 |       0.00179 |       0.00459 |       0.91209
     -0.01295 |       0.00000 |       0.00178 |       0.00462 |       0.91150
     -0.01266 |       0.00000 |       0.00177 |       0.00493 |       0.91168
Evaluating losses...
     -0.01365 |       0.00000 |       0.00181 |       0.00561 |       0.90811
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.31        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3178         |
| TimeElapsed     | 1.06e+04     |
| TimestepsSoFar  | 7503872      |
| ev_tdlam_before | 0.237        |
| loss_ent        | 0.9081081    |
| loss_kl         | 0.005613587  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013649604 |
| loss_vf_loss    | 0.0018127251 |
----------------------------------
********** Iteration 1832 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 1837 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00191 |       0.00000 |       0.00300 |       0.00120 |       0.95023
     -0.00426 |       0.00000 |       0.00210 |       0.00379 |       0.95031
     -0.00621 |       0.00000 |       0.00188 |       0.00346 |       0.95510
     -0.00774 |       0.00000 |       0.00176 |       0.00412 |       0.95533
     -0.00864 |       0.00000 |       0.00165 |       0.00452 |       0.95483
     -0.00923 |       0.00000 |       0.00159 |       0.00483 |       0.95497
     -0.00991 |       0.00000 |       0.00150 |       0.00499 |       0.95432
     -0.00948 |       0.00000 |       0.00146 |       0.00538 |       0.95525
     -0.01035 |       0.00000 |       0.00144 |       0.00555 |       0.95489
     -0.01054 |       0.00000 |       0.00135 |       0.00572 |       0.95505
Evaluating losses...
     -0.01138 |       0.00000 |       0.00131 |       0.00578 |     

     -0.01106 |       0.00000 |       0.00111 |       0.00683 |       0.91107
     -0.01223 |       0.00000 |       0.00102 |       0.00746 |       0.91227
     -0.01296 |       0.00000 |       0.00101 |       0.00717 |       0.91302
Evaluating losses...
     -0.01352 |       0.00000 |       0.00095 |       0.00740 |       0.91379
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | -0.17         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 3193          |
| TimeElapsed     | 1.06e+04      |
| TimestepsSoFar  | 7548928       |
| ev_tdlam_before | 0.582         |
| loss_ent        | 0.91379064    |
| loss_kl         | 0.0073973276  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.013521457  |
| loss_vf_loss    | 0.00094575644 |
-----------------------------------
********** Iteration 1843 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00299 |       0.00000 | 

********** Iteration 1848 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00275 |       0.00000 |       0.00052 |       0.00166 |       0.94428
     -0.00638 |       0.00000 |       0.00028 |       0.00224 |       0.94236
     -0.00945 |       0.00000 |       0.00023 |       0.00312 |       0.94213
     -0.01123 |       0.00000 |       0.00020 |       0.00311 |       0.94040
     -0.01205 |       0.00000 |       0.00018 |       0.00434 |       0.94122
     -0.01336 |       0.00000 |       0.00017 |       0.00481 |       0.93853
     -0.01423 |       0.00000 |       0.00016 |       0.00524 |       0.94022
     -0.01587 |       0.00000 |       0.00016 |       0.00488 |       0.93891
     -0.01562 |       0.00000 |       0.00015 |       0.00558 |       0.94001
     -0.01698 |       0.00000 |       0.00015 |       0.00600 |       0.94082
Evaluating losses...
     -0.01948 |       0.00000 |       0.00014 |       0.00554 |     

     -0.01105 |       0.00000 |       0.00119 |       0.00429 |       0.95131
     -0.01141 |       0.00000 |       0.00117 |       0.00454 |       0.95154
     -0.01150 |       0.00000 |       0.00116 |       0.00467 |       0.94962
Evaluating losses...
     -0.01237 |       0.00000 |       0.00114 |       0.00492 |       0.95067
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.24        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 3208         |
| TimeElapsed     | 1.07e+04     |
| TimestepsSoFar  | 7593984      |
| ev_tdlam_before | 0.784        |
| loss_ent        | 0.9506716    |
| loss_kl         | 0.0049214843 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012371623 |
| loss_vf_loss    | 0.0011355436 |
----------------------------------
********** Iteration 1854 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00094 |       0.00000 |       0.00255 

********** Iteration 1859 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00024 |       0.00000 |       0.00208 |       0.00251 |       0.94941
     -0.00531 |       0.00000 |       0.00124 |       0.00363 |       0.95247
     -0.00578 |       0.00000 |       0.00107 |       0.00455 |       0.95012
     -0.00889 |       0.00000 |       0.00103 |       0.00372 |       0.95275
     -0.01005 |       0.00000 |       0.00100 |       0.00370 |       0.94925
     -0.01066 |       0.00000 |       0.00101 |       0.00419 |       0.94925
     -0.01092 |       0.00000 |       0.00096 |       0.00449 |       0.95161
     -0.01168 |       0.00000 |       0.00094 |       0.00439 |       0.94879
     -0.01173 |       0.00000 |       0.00092 |       0.00502 |       0.95004
     -0.01236 |       0.00000 |       0.00091 |       0.00530 |       0.95081
Evaluating losses...
     -0.01324 |       0.00000 |       0.00091 |       0.00529 |     

     -0.01182 |       0.00000 |       0.00026 |       0.00617 |       0.92058
     -0.01261 |       0.00000 |       0.00025 |       0.00591 |       0.92004
     -0.01380 |       0.00000 |       0.00024 |       0.00620 |       0.91870
Evaluating losses...
     -0.01554 |       0.00000 |       0.00023 |       0.00592 |       0.91639
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.15         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 3223          |
| TimeElapsed     | 1.07e+04      |
| TimestepsSoFar  | 7639040       |
| ev_tdlam_before | -2.2          |
| loss_ent        | 0.91639227    |
| loss_kl         | 0.005922403   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.015536572  |
| loss_vf_loss    | 0.00023111801 |
-----------------------------------
********** Iteration 1865 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00023 |       0.00000 | 

********** Iteration 1870 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00099 |       0.00000 |       0.00273 |       0.00154 |       0.92302
     -0.00659 |       0.00000 |       0.00210 |       0.00458 |       0.92522
     -0.00896 |       0.00000 |       0.00199 |       0.00410 |       0.92004
     -0.00943 |       0.00000 |       0.00196 |       0.00412 |       0.92167
     -0.01027 |       0.00000 |       0.00191 |       0.00454 |       0.92160
     -0.01120 |       0.00000 |       0.00189 |       0.00498 |       0.92216
     -0.01161 |       0.00000 |       0.00187 |       0.00497 |       0.92277
     -0.01242 |       0.00000 |       0.00190 |       0.00527 |       0.92392
     -0.01249 |       0.00000 |       0.00186 |       0.00594 |       0.92470
     -0.01311 |       0.00000 |       0.00185 |       0.00597 |       0.92333
Evaluating losses...
     -0.01395 |       0.00000 |       0.00188 |       0.00628 |     

     -0.01063 |       0.00000 |       0.00117 |       0.00528 |       0.90389
     -0.01061 |       0.00000 |       0.00112 |       0.00532 |       0.90332
     -0.01130 |       0.00000 |       0.00109 |       0.00537 |       0.90175
     -0.01076 |       0.00000 |       0.00106 |       0.00596 |       0.90106
Evaluating losses...
     -0.01208 |       0.00000 |       0.00099 |       0.00559 |       0.90043
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.13         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3238          |
| TimeElapsed     | 1.07e+04      |
| TimestepsSoFar  | 7684096       |
| ev_tdlam_before | -0.242        |
| loss_ent        | 0.9004254     |
| loss_kl         | 0.005587896   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0120795565 |
| loss_vf_loss    | 0.0009944491  |
-----------------------------------
********** Iteration 1876 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 1881 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00018 |       0.00000 |       0.00363 |       0.00221 |       0.91198
     -0.00535 |       0.00000 |       0.00238 |       0.00318 |       0.90664
     -0.00663 |       0.00000 |       0.00217 |       0.00376 |       0.90492
     -0.00790 |       0.00000 |       0.00198 |       0.00368 |       0.90550
     -0.00855 |       0.00000 |       0.00185 |       0.00447 |       0.90484
     -0.00878 |       0.00000 |       0.00174 |       0.00475 |       0.90564
     -0.00948 |       0.00000 |       0.00159 |       0.00502 |       0.90448
     -0.00946 |       0.00000 |       0.00152 |       0.00541 |       0.90410
     -0.00998 |       0.00000 |       0.00147 |       0.00630 |       0.90370
     -0.01046 |       0.00000 |       0.00142 |       0.00607 |       0.90745
Evaluating losses...
     -0.01093 |       0.00000 |       0.00133 |       0.00611 |     

     -0.01317 |       0.00000 |       0.00061 |       0.00508 |       0.91551
     -0.01343 |       0.00000 |       0.00058 |       0.00519 |       0.91559
Evaluating losses...
     -0.01454 |       0.00000 |       0.00055 |       0.00512 |       0.91555
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.15         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3253          |
| TimeElapsed     | 1.08e+04      |
| TimestepsSoFar  | 7729152       |
| ev_tdlam_before | 0.757         |
| loss_ent        | 0.91554755    |
| loss_kl         | 0.005119606   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.014541483  |
| loss_vf_loss    | 0.00054757233 |
-----------------------------------
********** Iteration 1887 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00054 |       0.00000 |       0.00267 |       0.00067 |       0.90021
     -0.00454 |       0.00000 | 

********** Iteration 1892 ************
Eval num_timesteps=7749632, episode_reward=0.10 +/- 0.90
Episode length: 3000.00 +/- 0.00
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00122 |       0.00000 |       0.00166 |       0.00135 |       0.89650
     -0.00532 |       0.00000 |       0.00092 |       0.00360 |       0.89663
     -0.00877 |       0.00000 |       0.00078 |       0.00337 |       0.89563
     -0.00989 |       0.00000 |       0.00071 |       0.00355 |       0.89529
     -0.01053 |       0.00000 |       0.00068 |       0.00419 |       0.89418
     -0.01122 |       0.00000 |       0.00064 |       0.00457 |       0.89386
     -0.01148 |       0.00000 |       0.00062 |       0.00485 |       0.89393
     -0.01282 |       0.00000 |       0.00061 |       0.00518 |       0.89398
     -0.01323 |       0.00000 |       0.00058 |       0.00543 |       0.89505
     -0.01326 |       0.00000 |       0.00058 |       0.00565 |       0.89488

     -0.01132 |       0.00000 |       0.00153 |       0.00501 |       0.92293
     -0.01194 |       0.00000 |       0.00145 |       0.00511 |       0.92344
     -0.01232 |       0.00000 |       0.00142 |       0.00514 |       0.92290
     -0.01229 |       0.00000 |       0.00139 |       0.00588 |       0.92246
Evaluating losses...
     -0.01406 |       0.00000 |       0.00137 |       0.00562 |       0.91950
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.13        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 3268         |
| TimeElapsed     | 1.1e+04      |
| TimestepsSoFar  | 7774208      |
| ev_tdlam_before | 0.654        |
| loss_ent        | 0.91950476   |
| loss_kl         | 0.0056230496 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01405591  |
| loss_vf_loss    | 0.001374944  |
----------------------------------
********** Iteration 1898 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 1903 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00065 |       0.00000 |       0.00363 |       0.00112 |       0.86559
     -0.00635 |       0.00000 |       0.00300 |       0.00215 |       0.86802
     -0.00796 |       0.00000 |       0.00261 |       0.00288 |       0.87012
     -0.00993 |       0.00000 |       0.00245 |       0.00364 |       0.87176
     -0.01073 |       0.00000 |       0.00232 |       0.00386 |       0.87016
     -0.01197 |       0.00000 |       0.00224 |       0.00407 |       0.87142
     -0.01200 |       0.00000 |       0.00218 |       0.00439 |       0.87291
     -0.01297 |       0.00000 |       0.00216 |       0.00475 |       0.87052
     -0.01334 |       0.00000 |       0.00210 |       0.00511 |       0.87189
     -0.01384 |       0.00000 |       0.00204 |       0.00512 |       0.87186
Evaluating losses...
     -0.01481 |       0.00000 |       0.00198 |       0.00549 |     

     -0.01148 |       0.00000 |       0.00143 |       0.00608 |       0.88995
     -0.01185 |       0.00000 |       0.00142 |       0.00699 |       0.89042
Evaluating losses...
     -0.01285 |       0.00000 |       0.00136 |       0.00680 |       0.89036
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.21        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 3283         |
| TimeElapsed     | 1.1e+04      |
| TimestepsSoFar  | 7819264      |
| ev_tdlam_before | 0.515        |
| loss_ent        | 0.89036036   |
| loss_kl         | 0.0068041724 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012854901 |
| loss_vf_loss    | 0.0013560486 |
----------------------------------
********** Iteration 1909 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00265 |       0.00000 |       0.00177 |       0.00125 |       0.88600
     -0.00198 |       0.00000 |       0.00130 

********** Iteration 1914 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00117 |       0.00000 |       0.00213 |       0.00172 |       0.92455
     -0.00565 |       0.00000 |       0.00149 |       0.00305 |       0.92163
     -0.00708 |       0.00000 |       0.00126 |       0.00358 |       0.92408
     -0.00934 |       0.00000 |       0.00110 |       0.00423 |       0.92352
     -0.00906 |       0.00000 |       0.00091 |       0.00480 |       0.92392
     -0.01051 |       0.00000 |       0.00075 |       0.00526 |       0.92353
     -0.01159 |       0.00000 |       0.00068 |       0.00543 |       0.92560
     -0.01191 |       0.00000 |       0.00064 |       0.00545 |       0.92617
     -0.01242 |       0.00000 |       0.00061 |       0.00667 |       0.92708
     -0.01279 |       0.00000 |       0.00056 |       0.00612 |       0.92635
Evaluating losses...
     -0.01382 |       0.00000 |       0.00052 |       0.00599 |     

     -0.01152 |       0.00000 |       0.00210 |       0.00493 |       0.88809
     -0.01187 |       0.00000 |       0.00205 |       0.00522 |       0.88844
     -0.01256 |       0.00000 |       0.00203 |       0.00546 |       0.88814
Evaluating losses...
     -0.01311 |       0.00000 |       0.00195 |       0.00569 |       0.88929
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.29        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 3298         |
| TimeElapsed     | 1.11e+04     |
| TimestepsSoFar  | 7864320      |
| ev_tdlam_before | 0.232        |
| loss_ent        | 0.88928777   |
| loss_kl         | 0.005688204  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013107208 |
| loss_vf_loss    | 0.0019462175 |
----------------------------------
********** Iteration 1920 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00207 |       0.00000 |       0.00149 

********** Iteration 1925 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00048 |       0.00000 |       0.00354 |       0.00388 |       0.91298
     -0.00499 |       0.00000 |       0.00323 |       0.00430 |       0.90894
     -0.00716 |       0.00000 |       0.00305 |       0.00317 |       0.90977
     -0.00960 |       0.00000 |       0.00288 |       0.00307 |       0.90995
     -0.00937 |       0.00000 |       0.00280 |       0.00434 |       0.91057
     -0.01099 |       0.00000 |       0.00274 |       0.00489 |       0.90931
     -0.01170 |       0.00000 |       0.00266 |       0.00439 |       0.90846
     -0.01208 |       0.00000 |       0.00267 |       0.00465 |       0.90970
     -0.01230 |       0.00000 |       0.00262 |       0.00515 |       0.90902
     -0.01281 |       0.00000 |       0.00258 |       0.00574 |       0.90650
Evaluating losses...
     -0.01367 |       0.00000 |       0.00250 |       0.00523 |     

     -0.01709 |       0.00000 |      5.06e-05 |       0.00555 |       0.91702
     -0.01782 |       0.00000 |      4.92e-05 |       0.00565 |       0.91956
Evaluating losses...
     -0.01940 |       0.00000 |      4.80e-05 |       0.00577 |       0.91583
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.24         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 3313          |
| TimeElapsed     | 1.11e+04      |
| TimestepsSoFar  | 7909376       |
| ev_tdlam_before | -0.487        |
| loss_ent        | 0.9158268     |
| loss_kl         | 0.0057717636  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.019400079  |
| loss_vf_loss    | 4.8005524e-05 |
-----------------------------------
********** Iteration 1931 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00215 |       0.00000 |       0.00016 |       0.00102 |       0.91914
     -0.00371 |       0.00000 | 

********** Iteration 1936 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00165 |       0.00000 |       0.00044 |       0.00075 |       0.89536
     -0.00388 |       0.00000 |       0.00024 |       0.00283 |       0.89707
     -0.00802 |       0.00000 |       0.00018 |       0.00311 |       0.89739
     -0.00890 |       0.00000 |       0.00015 |       0.00340 |       0.89610
     -0.01006 |       0.00000 |       0.00013 |       0.00370 |       0.89618
     -0.01141 |       0.00000 |       0.00012 |       0.00419 |       0.89533
     -0.01248 |       0.00000 |       0.00011 |       0.00474 |       0.89594
     -0.01122 |       0.00000 |       0.00010 |       0.00548 |       0.89993
     -0.01368 |       0.00000 |      9.58e-05 |       0.00507 |       0.89645
     -0.01277 |       0.00000 |      9.11e-05 |       0.00624 |       0.89510
Evaluating losses...
     -0.01448 |       0.00000 |      8.84e-05 |       0.00669 |     

     -0.01409 |       0.00000 |       0.00017 |       0.00785 |       0.92169
     -0.01394 |       0.00000 |       0.00017 |       0.00791 |       0.92353
Evaluating losses...
     -0.01668 |       0.00000 |       0.00016 |       0.00677 |       0.92464
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.26         |
| EpThisIter      | 2             |
| EpisodesSoFar   | 3328          |
| TimeElapsed     | 1.11e+04      |
| TimestepsSoFar  | 7954432       |
| ev_tdlam_before | -1.2          |
| loss_ent        | 0.9246381     |
| loss_kl         | 0.006766665   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0166816    |
| loss_vf_loss    | 0.00015848651 |
-----------------------------------
********** Iteration 1942 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00292 |       0.00000 |       0.00011 |       0.00156 |       0.90625
     -0.00497 |       0.00000 | 

********** Iteration 1947 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00653 |       0.00000 |       0.00181 |       0.00528 |       0.91967
     -0.00272 |       0.00000 |       0.00150 |       0.00537 |       0.91802
     -0.00739 |       0.00000 |       0.00146 |       0.00314 |       0.91644
     -0.00859 |       0.00000 |       0.00141 |       0.00346 |       0.91471
     -0.01014 |       0.00000 |       0.00138 |       0.00323 |       0.91570
     -0.01038 |       0.00000 |       0.00131 |       0.00395 |       0.91550
     -0.01058 |       0.00000 |       0.00125 |       0.00547 |       0.91470
     -0.01061 |       0.00000 |       0.00121 |       0.00463 |       0.91486
     -0.01212 |       0.00000 |       0.00113 |       0.00452 |       0.91606
     -0.01232 |       0.00000 |       0.00109 |       0.00430 |       0.91573
Evaluating losses...
     -0.01333 |       0.00000 |       0.00102 |       0.00451 |     

     -0.01208 |       0.00000 |      4.53e-05 |       0.00489 |       0.89118
     -0.01259 |       0.00000 |      4.19e-05 |       0.00496 |       0.89192
Evaluating losses...
     -0.01382 |       0.00000 |      4.33e-05 |       0.00488 |       0.89181
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.25        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 3343         |
| TimeElapsed     | 1.12e+04     |
| TimestepsSoFar  | 7999488      |
| ev_tdlam_before | -3.86        |
| loss_ent        | 0.89180523   |
| loss_kl         | 0.004883491  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013824626 |
| loss_vf_loss    | 4.326847e-05 |
----------------------------------
********** Iteration 1953 ************
Eval num_timesteps=7999488, episode_reward=0.32 +/- 1.00
Episode length: 2997.56 +/- 24.28
New best mean reward!
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.000

********** Iteration 1958 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00088 |       0.00000 |       0.00012 |       0.00130 |       0.89574
     -0.00370 |       0.00000 |      6.44e-05 |       0.00242 |       0.89516
     -0.00560 |       0.00000 |      4.46e-05 |       0.00255 |       0.89585
     -0.00660 |       0.00000 |      3.53e-05 |       0.00266 |       0.89600
     -0.00720 |       0.00000 |      2.85e-05 |       0.00337 |       0.89446
     -0.00781 |       0.00000 |      2.60e-05 |       0.00349 |       0.89463
     -0.00819 |       0.00000 |      2.28e-05 |       0.00406 |       0.89540
     -0.00872 |       0.00000 |      2.24e-05 |       0.00403 |       0.89478
     -0.00890 |       0.00000 |      1.90e-05 |       0.00428 |       0.89565
     -0.00904 |       0.00000 |      1.78e-05 |       0.00483 |       0.89609
Evaluating losses...
     -0.00969 |       0.00000 |      1.66e-05 |       0.00456 |     

     -0.01308 |       0.00000 |       0.00238 |       0.00573 |       0.89465
     -0.01302 |       0.00000 |       0.00235 |       0.00566 |       0.89152
     -0.01354 |       0.00000 |       0.00233 |       0.00642 |       0.89275
Evaluating losses...
     -0.01495 |       0.00000 |       0.00227 |       0.00604 |       0.89236
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.23        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3357         |
| TimeElapsed     | 1.14e+04     |
| TimestepsSoFar  | 8044544      |
| ev_tdlam_before | 0.538        |
| loss_ent        | 0.8923616    |
| loss_kl         | 0.006042622  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.014951845 |
| loss_vf_loss    | 0.0022717307 |
----------------------------------
********** Iteration 1964 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00146 |       0.00000 |       0.00121 

********** Iteration 1969 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00119 |       0.00000 |       0.00369 |       0.00496 |       0.86741
     -0.00635 |       0.00000 |       0.00322 |       0.00743 |       0.86838
     -0.00766 |       0.00000 |       0.00290 |       0.00485 |       0.86944
     -0.00819 |       0.00000 |       0.00266 |       0.00432 |       0.86735
     -0.00924 |       0.00000 |       0.00250 |       0.00588 |       0.86688
     -0.00932 |       0.00000 |       0.00239 |       0.00789 |       0.87157
     -0.00930 |       0.00000 |       0.00233 |       0.01233 |       0.87155
     -0.01028 |       0.00000 |       0.00224 |       0.00962 |       0.86989
     -0.01053 |       0.00000 |       0.00221 |       0.00889 |       0.86877
     -0.01056 |       0.00000 |       0.00218 |       0.00851 |       0.86891
Evaluating losses...
     -0.01141 |       0.00000 |       0.00209 |       0.00767 |     

     -0.01033 |       0.00000 |       0.00109 |       0.00374 |       0.87541
     -0.01031 |       0.00000 |       0.00106 |       0.00426 |       0.87705
Evaluating losses...
     -0.01082 |       0.00000 |       0.00101 |       0.00460 |       0.87604
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.26        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3372         |
| TimeElapsed     | 1.14e+04     |
| TimestepsSoFar  | 8089600      |
| ev_tdlam_before | 0.408        |
| loss_ent        | 0.8760387    |
| loss_kl         | 0.0045996285 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010823641 |
| loss_vf_loss    | 0.0010129442 |
----------------------------------
********** Iteration 1975 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00092 |       0.00000 |       0.00268 |       0.00142 |       0.89250
     -0.00627 |       0.00000 |       0.00198 

********** Iteration 1980 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00204 |       0.00000 |       0.00174 |       0.00545 |       0.89710
      0.00059 |       0.00000 |       0.00145 |       0.01093 |       0.90134
     -0.00440 |       0.00000 |       0.00142 |       0.00850 |       0.90394
     -0.00630 |       0.00000 |       0.00136 |       0.00621 |       0.90135
     -0.00693 |       0.00000 |       0.00137 |       0.00507 |       0.89997
     -0.00795 |       0.00000 |       0.00132 |       0.00492 |       0.90064
     -0.00863 |       0.00000 |       0.00130 |       0.00443 |       0.90002
     -0.00898 |       0.00000 |       0.00129 |       0.00479 |       0.90051
     -0.00817 |       0.00000 |       0.00127 |       0.00645 |       0.89905
     -0.00871 |       0.00000 |       0.00127 |       0.00852 |       0.90139
Evaluating losses...
     -0.00961 |       0.00000 |       0.00120 |       0.00663 |     

     -0.00734 |       0.00000 |       0.00023 |       0.00334 |       0.88322
     -0.00774 |       0.00000 |       0.00023 |       0.00341 |       0.88478
     -0.00791 |       0.00000 |       0.00023 |       0.00332 |       0.88361
Evaluating losses...
     -0.00826 |       0.00000 |       0.00021 |       0.00377 |       0.88142
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.23         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3387          |
| TimeElapsed     | 1.14e+04      |
| TimestepsSoFar  | 8134656       |
| ev_tdlam_before | 0.437         |
| loss_ent        | 0.8814243     |
| loss_kl         | 0.0037656655  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.008260465  |
| loss_vf_loss    | 0.00021341247 |
-----------------------------------
********** Iteration 1986 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00049 |       0.00000 | 

********** Iteration 1991 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00029 |       0.00000 |       0.00469 |       0.00403 |       0.86789
     -0.00630 |       0.00000 |       0.00418 |       0.00724 |       0.86551
     -0.00878 |       0.00000 |       0.00396 |       0.01124 |       0.86589
     -0.01039 |       0.00000 |       0.00386 |       0.00719 |       0.86382
     -0.01169 |       0.00000 |       0.00381 |       0.00758 |       0.86298
     -0.01216 |       0.00000 |       0.00369 |       0.00742 |       0.86214
     -0.01313 |       0.00000 |       0.00368 |       0.00815 |       0.86281
     -0.01323 |       0.00000 |       0.00351 |       0.00867 |       0.86462
     -0.01343 |       0.00000 |       0.00340 |       0.00975 |       0.86355
     -0.01418 |       0.00000 |       0.00338 |       0.00961 |       0.86301
Evaluating losses...
     -0.01495 |       0.00000 |       0.00326 |       0.00854 |     

     -0.01284 |       0.00000 |      6.96e-05 |       0.00463 |       0.90798
     -0.01329 |       0.00000 |      6.92e-05 |       0.00483 |       0.90833
Evaluating losses...
     -0.01324 |       0.00000 |      6.75e-05 |       0.00595 |       0.90688
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.14        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3402         |
| TimeElapsed     | 1.15e+04     |
| TimestepsSoFar  | 8179712      |
| ev_tdlam_before | -0.369       |
| loss_ent        | 0.9068762    |
| loss_kl         | 0.0059497785 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013244869 |
| loss_vf_loss    | 6.753126e-05 |
----------------------------------
********** Iteration 1997 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00147 |       0.00000 |      4.83e-05 |       0.00152 |       0.88598
     -0.00241 |       0.00000 |      2.77e-05 

********** Iteration 2002 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00109 |       0.00000 |       0.00064 |       0.00162 |       0.85262
     -0.00415 |       0.00000 |       0.00025 |       0.00302 |       0.85214
     -0.00561 |       0.00000 |       0.00022 |       0.00442 |       0.85471
     -0.00741 |       0.00000 |       0.00021 |       0.00483 |       0.85533
     -0.00873 |       0.00000 |       0.00020 |       0.00459 |       0.85390
     -0.01001 |       0.00000 |       0.00019 |       0.00435 |       0.85515
     -0.01027 |       0.00000 |       0.00019 |       0.00472 |       0.85500
     -0.01130 |       0.00000 |       0.00018 |       0.00531 |       0.85641
     -0.01117 |       0.00000 |       0.00018 |       0.00523 |       0.85492
     -0.01217 |       0.00000 |       0.00018 |       0.00545 |       0.85578
Evaluating losses...
     -0.01315 |       0.00000 |       0.00017 |       0.00528 |     

     -0.01037 |       0.00000 |      4.65e-05 |       0.00462 |       0.86053
     -0.01025 |       0.00000 |      4.46e-05 |       0.00452 |       0.86245
     -0.01108 |       0.00000 |      4.38e-05 |       0.00515 |       0.86385
Evaluating losses...
     -0.01253 |       0.00000 |      4.19e-05 |       0.00514 |       0.86149
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.16         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3417          |
| TimeElapsed     | 1.15e+04      |
| TimestepsSoFar  | 8224768       |
| ev_tdlam_before | -1.23         |
| loss_ent        | 0.8614897     |
| loss_kl         | 0.005135481   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.012525675  |
| loss_vf_loss    | 4.1937015e-05 |
-----------------------------------
********** Iteration 2008 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00011 |       0.00000 | 

********** Iteration 2013 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00191 |       0.00000 |       0.00167 |       0.00677 |       0.82197
     -0.00391 |       0.00000 |       0.00163 |       0.01051 |       0.82854
     -0.00586 |       0.00000 |       0.00164 |       0.00530 |       0.82275
     -0.00584 |       0.00000 |       0.00162 |       0.00699 |       0.82477
     -0.00636 |       0.00000 |       0.00161 |       0.00902 |       0.82654
     -0.00727 |       0.00000 |       0.00160 |       0.00547 |       0.82168
     -0.00793 |       0.00000 |       0.00159 |       0.00367 |       0.81810
     -0.00799 |       0.00000 |       0.00157 |       0.00501 |       0.82109
     -0.00832 |       0.00000 |       0.00157 |       0.00397 |       0.81704
     -0.00837 |       0.00000 |       0.00156 |       0.00744 |       0.82289
Evaluating losses...
     -0.00752 |       0.00000 |       0.00148 |       0.01396 |     

     -0.00994 |       0.00000 |       0.00300 |       0.00430 |       0.80521
     -0.01060 |       0.00000 |       0.00285 |       0.00455 |       0.80714
     -0.01097 |       0.00000 |       0.00277 |       0.00502 |       0.80762
     -0.01182 |       0.00000 |       0.00266 |       0.00475 |       0.80875
Evaluating losses...
     -0.01228 |       0.00000 |       0.00253 |       0.00568 |       0.80753
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.09        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3432         |
| TimeElapsed     | 1.17e+04     |
| TimestepsSoFar  | 8269824      |
| ev_tdlam_before | 0.516        |
| loss_ent        | 0.8075295    |
| loss_kl         | 0.0056818435 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012276735 |
| loss_vf_loss    | 0.0025334284 |
----------------------------------
********** Iteration 2019 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 2024 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00056 |       0.00000 |       0.00847 |       0.00170 |       0.84137
     -0.00579 |       0.00000 |       0.00697 |       0.00453 |       0.84352
     -0.00821 |       0.00000 |       0.00626 |       0.00367 |       0.84462
     -0.00942 |       0.00000 |       0.00601 |       0.00554 |       0.84488
     -0.01002 |       0.00000 |       0.00572 |       0.00763 |       0.84613
     -0.01117 |       0.00000 |       0.00555 |       0.00575 |       0.84513
     -0.01185 |       0.00000 |       0.00534 |       0.00671 |       0.84614
     -0.01235 |       0.00000 |       0.00517 |       0.00720 |       0.84684
     -0.01305 |       0.00000 |       0.00506 |       0.00721 |       0.84563
     -0.01369 |       0.00000 |       0.00498 |       0.00762 |       0.84769
Evaluating losses...
     -0.01434 |       0.00000 |       0.00482 |       0.00758 |     

     -0.00817 |       0.00000 |       0.00074 |       0.00427 |       0.91190
     -0.00929 |       0.00000 |       0.00073 |       0.00488 |       0.91089
     -0.00983 |       0.00000 |       0.00072 |       0.00512 |       0.90948
Evaluating losses...
     -0.01017 |       0.00000 |       0.00069 |       0.00518 |       0.91028
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.07        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3447         |
| TimeElapsed     | 1.18e+04     |
| TimestepsSoFar  | 8314880      |
| ev_tdlam_before | 0.613        |
| loss_ent        | 0.91028357   |
| loss_kl         | 0.005175774  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010174677 |
| loss_vf_loss    | 0.0006946298 |
----------------------------------
********** Iteration 2030 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00209 |       0.00000 |       0.00123 

********** Iteration 2035 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00043 |       0.00000 |       0.00137 |       0.00087 |       0.86572
     -0.00304 |       0.00000 |       0.00084 |       0.00147 |       0.86325
     -0.00456 |       0.00000 |       0.00077 |       0.00220 |       0.86076
     -0.00609 |       0.00000 |       0.00073 |       0.00236 |       0.85912
     -0.00676 |       0.00000 |       0.00068 |       0.00293 |       0.86000
     -0.00791 |       0.00000 |       0.00069 |       0.00333 |       0.86041
     -0.00833 |       0.00000 |       0.00064 |       0.00371 |       0.85976
     -0.00831 |       0.00000 |       0.00064 |       0.00398 |       0.85878
     -0.00924 |       0.00000 |       0.00062 |       0.00443 |       0.85978
     -0.00952 |       0.00000 |       0.00059 |       0.00474 |       0.86028
Evaluating losses...
     -0.01043 |       0.00000 |       0.00056 |       0.00531 |     

     -0.01113 |       0.00000 |       0.00108 |       0.00495 |       0.87971
     -0.01164 |       0.00000 |       0.00106 |       0.00500 |       0.88061
Evaluating losses...
     -0.01264 |       0.00000 |       0.00105 |       0.00515 |       0.88184
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0            |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3462         |
| TimeElapsed     | 1.18e+04     |
| TimestepsSoFar  | 8359936      |
| ev_tdlam_before | 0.623        |
| loss_ent        | 0.88183534   |
| loss_kl         | 0.0051487517 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012642307 |
| loss_vf_loss    | 0.0010532389 |
----------------------------------
********** Iteration 2041 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00201 |       0.00000 |       0.00115 |       0.00112 |       0.82926
     -0.00298 |       0.00000 |       0.00094 

********** Iteration 2046 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00109 |       0.00000 |       0.00223 |       0.00372 |       0.85636
     -0.00324 |       0.00000 |       0.00190 |       0.00273 |       0.85824
     -0.00483 |       0.00000 |       0.00173 |       0.00389 |       0.85421
     -0.00654 |       0.00000 |       0.00165 |       0.00389 |       0.85789
     -0.00664 |       0.00000 |       0.00156 |       0.00492 |       0.85577
     -0.00680 |       0.00000 |       0.00156 |       0.00418 |       0.85722
     -0.00735 |       0.00000 |       0.00155 |       0.00446 |       0.85832
     -0.00796 |       0.00000 |       0.00149 |       0.00452 |       0.85681
     -0.00858 |       0.00000 |       0.00148 |       0.00392 |       0.85990
     -0.00882 |       0.00000 |       0.00144 |       0.00424 |       0.86049
Evaluating losses...
     -0.00943 |       0.00000 |       0.00149 |       0.00495 |     

     -0.01022 |       0.00000 |      4.03e-05 |       0.00300 |       0.85072
     -0.01088 |       0.00000 |      3.98e-05 |       0.00319 |       0.85065
     -0.01146 |       0.00000 |      3.76e-05 |       0.00342 |       0.85067
Evaluating losses...
     -0.01247 |       0.00000 |      3.79e-05 |       0.00348 |       0.85170
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.01         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3477         |
| TimeElapsed     | 1.19e+04     |
| TimestepsSoFar  | 8404992      |
| ev_tdlam_before | -1.03        |
| loss_ent        | 0.8516973    |
| loss_kl         | 0.0034842025 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012467161 |
| loss_vf_loss    | 3.785202e-05 |
----------------------------------
********** Iteration 2052 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00015 |       0.00000 |       0.00524 

********** Iteration 2057 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00243 |       0.00000 |       0.00489 |       0.00628 |       0.81173
     -0.00357 |       0.00000 |       0.00429 |       0.00656 |       0.81465
     -0.00564 |       0.00000 |       0.00401 |       0.00477 |       0.81180
     -0.00703 |       0.00000 |       0.00386 |       0.00457 |       0.81556
     -0.00816 |       0.00000 |       0.00374 |       0.00466 |       0.81698
     -0.00827 |       0.00000 |       0.00366 |       0.00532 |       0.81700
     -0.00877 |       0.00000 |       0.00362 |       0.00493 |       0.81730
     -0.00894 |       0.00000 |       0.00359 |       0.00506 |       0.81821
     -0.01020 |       0.00000 |       0.00359 |       0.00496 |       0.81834
     -0.01012 |       0.00000 |       0.00353 |       0.00531 |       0.81815
Evaluating losses...
     -0.01129 |       0.00000 |       0.00342 |       0.00570 |     

     -0.01503 |       0.00000 |      5.41e-05 |       0.00468 |       0.84901
     -0.01644 |       0.00000 |      5.08e-05 |       0.00496 |       0.84949
Evaluating losses...
     -0.01830 |       0.00000 |      4.97e-05 |       0.00507 |       0.84980
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.16         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3492         |
| TimeElapsed     | 1.19e+04     |
| TimestepsSoFar  | 8450048      |
| ev_tdlam_before | -0.407       |
| loss_ent        | 0.8498       |
| loss_kl         | 0.005073181  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.01829546  |
| loss_vf_loss    | 4.974985e-05 |
----------------------------------
********** Iteration 2063 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00066 |       0.00000 |       0.00245 |       0.00214 |       0.84426
     -0.00443 |       0.00000 |       0.00205 

********** Iteration 2068 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00061 |       0.00000 |       0.00028 |       0.00104 |       0.86951
     -0.00712 |       0.00000 |       0.00016 |       0.00186 |       0.86681
     -0.00846 |       0.00000 |       0.00013 |       0.00251 |       0.86734
     -0.00986 |       0.00000 |       0.00012 |       0.00255 |       0.86621
     -0.01128 |       0.00000 |       0.00011 |       0.00281 |       0.86524
     -0.01199 |       0.00000 |       0.00010 |       0.00320 |       0.86511
     -0.01235 |       0.00000 |      9.98e-05 |       0.00354 |       0.86425
     -0.01295 |       0.00000 |      9.55e-05 |       0.00380 |       0.86386
     -0.01360 |       0.00000 |      9.33e-05 |       0.00418 |       0.86538
     -0.01415 |       0.00000 |      9.11e-05 |       0.00452 |       0.86461
Evaluating losses...
     -0.01534 |       0.00000 |      8.91e-05 |       0.00486 |     

     -0.01059 |       0.00000 |       0.00038 |       0.00433 |       0.85931
     -0.01082 |       0.00000 |       0.00037 |       0.00448 |       0.85843
Evaluating losses...
     -0.01149 |       0.00000 |       0.00035 |       0.00447 |       0.85858
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.14         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3507         |
| TimeElapsed     | 1.2e+04      |
| TimestepsSoFar  | 8495104      |
| ev_tdlam_before | -0.661       |
| loss_ent        | 0.85858357   |
| loss_kl         | 0.0044738334 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.011487756 |
| loss_vf_loss    | 0.0003517296 |
----------------------------------
********** Iteration 2074 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00079 |       0.00000 |       0.00148 |       0.00124 |       0.88730
     -0.00350 |       0.00000 |       0.00099 

********** Iteration 2079 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00078 |       0.00000 |       0.00012 |       0.00215 |       0.86796
     -0.00661 |       0.00000 |      7.82e-05 |       0.00286 |       0.86632
     -0.00794 |       0.00000 |      6.09e-05 |       0.00334 |       0.86500
     -0.00975 |       0.00000 |      5.32e-05 |       0.00448 |       0.86548
     -0.01113 |       0.00000 |      4.74e-05 |       0.00409 |       0.86559
     -0.01114 |       0.00000 |      4.34e-05 |       0.00518 |       0.86525
     -0.01258 |       0.00000 |      4.05e-05 |       0.00476 |       0.86521
     -0.01291 |       0.00000 |      3.95e-05 |       0.00534 |       0.86464
     -0.01362 |       0.00000 |      3.80e-05 |       0.00555 |       0.86484
     -0.01425 |       0.00000 |      3.56e-05 |       0.00599 |       0.86505
Evaluating losses...
     -0.01429 |       0.00000 |      3.51e-05 |       0.00701 |     

     -0.01312 |       0.00000 |      1.02e-05 |       0.00467 |       0.88424
     -0.01383 |       0.00000 |      9.56e-06 |       0.00515 |       0.88474
     -0.01393 |       0.00000 |      9.35e-06 |       0.00571 |       0.88440
Evaluating losses...
     -0.01540 |       0.00000 |      1.07e-05 |       0.00589 |       0.88392
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.12          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3522          |
| TimeElapsed     | 1.22e+04      |
| TimestepsSoFar  | 8540160       |
| ev_tdlam_before | -1.94         |
| loss_ent        | 0.88391966    |
| loss_kl         | 0.005891206   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.015404821  |
| loss_vf_loss    | 1.0669344e-05 |
-----------------------------------
********** Iteration 2085 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -6.58e-05 |       0.00000 | 

********** Iteration 2090 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00226 |       0.00000 |       0.00247 |       0.00067 |       0.82929
     -0.00343 |       0.00000 |       0.00183 |       0.00164 |       0.82866
     -0.00526 |       0.00000 |       0.00175 |       0.00205 |       0.82960
     -0.00607 |       0.00000 |       0.00164 |       0.00234 |       0.82795
     -0.00769 |       0.00000 |       0.00160 |       0.00287 |       0.82703
     -0.00798 |       0.00000 |       0.00152 |       0.00308 |       0.82784
     -0.00907 |       0.00000 |       0.00150 |       0.00304 |       0.82668
     -0.00991 |       0.00000 |       0.00147 |       0.00331 |       0.82772
     -0.01082 |       0.00000 |       0.00146 |       0.00362 |       0.82769
     -0.01075 |       0.00000 |       0.00146 |       0.00371 |       0.82749
Evaluating losses...
     -0.01187 |       0.00000 |       0.00142 |       0.00360 |     

     -0.01235 |       0.00000 |      1.86e-05 |       0.00384 |       0.88688
     -0.01254 |       0.00000 |      1.78e-05 |       0.00454 |       0.88696
     -0.01328 |       0.00000 |      1.73e-05 |       0.00447 |       0.88804
Evaluating losses...
     -0.01482 |       0.00000 |      1.67e-05 |       0.00418 |       0.88748
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.15          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3537          |
| TimeElapsed     | 1.22e+04      |
| TimestepsSoFar  | 8585216       |
| ev_tdlam_before | -0.421        |
| loss_ent        | 0.8874833     |
| loss_kl         | 0.004182644   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.014821047  |
| loss_vf_loss    | 1.6729304e-05 |
-----------------------------------
********** Iteration 2096 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00056 |       0.00000 | 

********** Iteration 2101 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00362 |       0.00000 |       0.00034 |       0.00267 |       0.81755
     -0.00390 |       0.00000 |       0.00022 |       0.00575 |       0.81587
     -0.00699 |       0.00000 |       0.00017 |       0.00447 |       0.81813
     -0.00673 |       0.00000 |       0.00014 |       0.00573 |       0.81821
     -0.00922 |       0.00000 |       0.00012 |       0.00490 |       0.81578
     -0.01026 |       0.00000 |       0.00011 |       0.00479 |       0.81976
     -0.01178 |       0.00000 |      9.43e-05 |       0.00485 |       0.81824
     -0.01154 |       0.00000 |      8.53e-05 |       0.00563 |       0.81870
     -0.01177 |       0.00000 |      7.78e-05 |       0.00666 |       0.82024
     -0.01201 |       0.00000 |      7.35e-05 |       0.00623 |       0.81948
Evaluating losses...
     -0.01371 |       0.00000 |      6.81e-05 |       0.00652 |     

     -0.01160 |       0.00000 |      6.61e-05 |       0.00366 |       0.80382
     -0.01257 |       0.00000 |      6.51e-05 |       0.00384 |       0.80251
     -0.01323 |       0.00000 |      6.11e-05 |       0.00411 |       0.80345
Evaluating losses...
     -0.01407 |       0.00000 |      6.01e-05 |       0.00492 |       0.80392
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.1           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3552          |
| TimeElapsed     | 1.23e+04      |
| TimestepsSoFar  | 8630272       |
| ev_tdlam_before | -0.858        |
| loss_ent        | 0.80391645    |
| loss_kl         | 0.0049205204  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.014068139  |
| loss_vf_loss    | 6.0132406e-05 |
-----------------------------------
********** Iteration 2107 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00207 |       0.00000 | 

********** Iteration 2112 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00019 |       0.00000 |       0.00030 |       0.00120 |       0.84472
     -0.00607 |       0.00000 |       0.00020 |       0.00322 |       0.84682
     -0.00825 |       0.00000 |       0.00017 |       0.00340 |       0.84849
     -0.01011 |       0.00000 |       0.00015 |       0.00374 |       0.85021
     -0.01094 |       0.00000 |       0.00014 |       0.00439 |       0.84990
     -0.01205 |       0.00000 |       0.00013 |       0.00435 |       0.84917
     -0.01278 |       0.00000 |       0.00013 |       0.00502 |       0.85057
     -0.01376 |       0.00000 |       0.00012 |       0.00543 |       0.85090
     -0.01430 |       0.00000 |       0.00011 |       0.00548 |       0.85085
     -0.01473 |       0.00000 |       0.00011 |       0.00614 |       0.85033
Evaluating losses...
     -0.01629 |       0.00000 |       0.00011 |       0.00616 |     

     -0.00695 |       0.00000 |       0.00039 |       0.00273 |       0.83829
     -0.00677 |       0.00000 |       0.00040 |       0.00334 |       0.83807
     -0.00634 |       0.00000 |       0.00039 |       0.00375 |       0.83956
Evaluating losses...
     -0.00808 |       0.00000 |       0.00037 |       0.00358 |       0.83901
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.07          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3567          |
| TimeElapsed     | 1.23e+04      |
| TimestepsSoFar  | 8675328       |
| ev_tdlam_before | 0.537         |
| loss_ent        | 0.839011      |
| loss_kl         | 0.0035781183  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.008083557  |
| loss_vf_loss    | 0.00036613355 |
-----------------------------------
********** Iteration 2118 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00077 |       0.00000 | 

********** Iteration 2123 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00103 |       0.00000 |       0.00148 |       0.00113 |       0.84084
     -0.00577 |       0.00000 |       0.00065 |       0.00306 |       0.84322
     -0.00789 |       0.00000 |       0.00058 |       0.00279 |       0.84253
     -0.00817 |       0.00000 |       0.00053 |       0.00293 |       0.84089
     -0.00915 |       0.00000 |       0.00050 |       0.00321 |       0.84073
     -0.00937 |       0.00000 |       0.00049 |       0.00394 |       0.84052
     -0.00946 |       0.00000 |       0.00047 |       0.00403 |       0.84093
     -0.01019 |       0.00000 |       0.00045 |       0.00461 |       0.84145
     -0.01040 |       0.00000 |       0.00043 |       0.00470 |       0.84238
     -0.01093 |       0.00000 |       0.00043 |       0.00504 |       0.84302
Evaluating losses...
     -0.01172 |       0.00000 |       0.00043 |       0.00551 |     

     -0.01309 |       0.00000 |      5.56e-05 |       0.00596 |       0.85496
     -0.01371 |       0.00000 |      5.47e-05 |       0.00646 |       0.85190
Evaluating losses...
     -0.01509 |       0.00000 |      5.54e-05 |       0.00622 |       0.85359
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.03         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3582         |
| TimeElapsed     | 1.24e+04     |
| TimestepsSoFar  | 8720384      |
| ev_tdlam_before | -1.39        |
| loss_ent        | 0.8535938    |
| loss_kl         | 0.0062158494 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.015094231 |
| loss_vf_loss    | 5.53607e-05  |
----------------------------------
********** Iteration 2129 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00070 |       0.00000 |       0.00374 |       0.00241 |       0.81836
     -0.00591 |       0.00000 |       0.00315 

********** Iteration 2134 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00101 |       0.00000 |       0.00270 |       0.00160 |       0.83605
     -0.00539 |       0.00000 |       0.00241 |       0.00290 |       0.83218
     -0.00698 |       0.00000 |       0.00232 |       0.00298 |       0.83666
     -0.00557 |       0.00000 |       0.00228 |       0.00628 |       0.83871
     -0.00686 |       0.00000 |       0.00226 |       0.00751 |       0.83709
     -0.00905 |       0.00000 |       0.00224 |       0.00453 |       0.83527
     -0.00959 |       0.00000 |       0.00223 |       0.00414 |       0.83538
     -0.01072 |       0.00000 |       0.00225 |       0.00393 |       0.83631
     -0.01113 |       0.00000 |       0.00223 |       0.00402 |       0.83550
     -0.01175 |       0.00000 |       0.00222 |       0.00413 |       0.83520
Evaluating losses...
     -0.01258 |       0.00000 |       0.00223 |       0.00427 |     

     -0.01266 |       0.00000 |       0.00012 |       0.00391 |       0.84296
     -0.01381 |       0.00000 |       0.00012 |       0.00408 |       0.84126
     -0.01422 |       0.00000 |       0.00012 |       0.00443 |       0.84139
     -0.01551 |       0.00000 |       0.00011 |       0.00489 |       0.84205
Evaluating losses...
     -0.01554 |       0.00000 |       0.00011 |       0.00575 |       0.84307
------------------------------------
| EpLenMean       | 3.02e+03       |
| EpRewMean       | -0.03          |
| EpThisIter      | 2              |
| EpisodesSoFar   | 3597           |
| TimeElapsed     | 1.26e+04       |
| TimestepsSoFar  | 8765440        |
| ev_tdlam_before | -0.409         |
| loss_ent        | 0.84307224     |
| loss_kl         | 0.005750689    |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.015544424   |
| loss_vf_loss    | 0.000107457425 |
------------------------------------
********** Iteration 2140 ************
Optimizing...
     pol_surr |   

********** Iteration 2145 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00119 |       0.00000 |       0.00191 |       0.00172 |       0.83629
     -0.00490 |       0.00000 |       0.00113 |       0.00265 |       0.83874
     -0.00751 |       0.00000 |       0.00097 |       0.00273 |       0.83657
     -0.00914 |       0.00000 |       0.00087 |       0.00271 |       0.83539
     -0.01000 |       0.00000 |       0.00081 |       0.00285 |       0.83629
     -0.01031 |       0.00000 |       0.00076 |       0.00277 |       0.83522
     -0.01099 |       0.00000 |       0.00074 |       0.00307 |       0.83518
     -0.01149 |       0.00000 |       0.00071 |       0.00311 |       0.83392
     -0.01186 |       0.00000 |       0.00067 |       0.00322 |       0.83477
     -0.01204 |       0.00000 |       0.00067 |       0.00356 |       0.83483
Evaluating losses...
     -0.01299 |       0.00000 |       0.00063 |       0.00374 |     

     -0.00708 |       0.00000 |       0.00093 |       0.00460 |       0.85347
     -0.00802 |       0.00000 |       0.00089 |       0.00452 |       0.85373
     -0.00784 |       0.00000 |       0.00086 |       0.00463 |       0.85365
Evaluating losses...
     -0.00883 |       0.00000 |       0.00082 |       0.00475 |       0.85460
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.09        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 3612         |
| TimeElapsed     | 1.26e+04     |
| TimestepsSoFar  | 8810496      |
| ev_tdlam_before | 0.283        |
| loss_ent        | 0.8546041    |
| loss_kl         | 0.0047457693 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008829603 |
| loss_vf_loss    | 0.0008229595 |
----------------------------------
********** Iteration 2151 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00201 |       0.00000 |       0.00087 

********** Iteration 2156 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00071 |       0.00000 |       0.00279 |       0.00110 |       0.83033
     -0.00645 |       0.00000 |       0.00210 |       0.00264 |       0.83027
     -0.00821 |       0.00000 |       0.00179 |       0.00225 |       0.82852
     -0.00943 |       0.00000 |       0.00162 |       0.00249 |       0.82917
     -0.01039 |       0.00000 |       0.00142 |       0.00284 |       0.82922
     -0.01093 |       0.00000 |       0.00131 |       0.00326 |       0.82894
     -0.01143 |       0.00000 |       0.00123 |       0.00328 |       0.82905
     -0.01179 |       0.00000 |       0.00113 |       0.00344 |       0.83059
     -0.01208 |       0.00000 |       0.00105 |       0.00361 |       0.82963
     -0.01227 |       0.00000 |       0.00103 |       0.00400 |       0.83080
Evaluating losses...
     -0.01322 |       0.00000 |       0.00094 |       0.00409 |     

     -0.00987 |       0.00000 |       0.00152 |       0.00481 |       0.79061
     -0.01037 |       0.00000 |       0.00146 |       0.00561 |       0.79022
     -0.01068 |       0.00000 |       0.00145 |       0.00571 |       0.79107
Evaluating losses...
     -0.01129 |       0.00000 |       0.00143 |       0.00578 |       0.79251
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.05        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 3627         |
| TimeElapsed     | 1.27e+04     |
| TimestepsSoFar  | 8855552      |
| ev_tdlam_before | 0.558        |
| loss_ent        | 0.7925054    |
| loss_kl         | 0.00577558   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.011287881 |
| loss_vf_loss    | 0.0014317147 |
----------------------------------
********** Iteration 2162 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00047 |       0.00000 |       0.00043 

********** Iteration 2167 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00011 |       0.00000 |       0.00128 |       0.00095 |       0.83911
     -0.00463 |       0.00000 |       0.00077 |       0.00184 |       0.83571
     -0.00620 |       0.00000 |       0.00073 |       0.00194 |       0.83391
     -0.00645 |       0.00000 |       0.00072 |       0.00238 |       0.83259
     -0.00690 |       0.00000 |       0.00070 |       0.00272 |       0.83105
     -0.00777 |       0.00000 |       0.00069 |       0.00306 |       0.83036
     -0.00818 |       0.00000 |       0.00070 |       0.00319 |       0.83099
     -0.00847 |       0.00000 |       0.00066 |       0.00356 |       0.83065
     -0.00884 |       0.00000 |       0.00067 |       0.00376 |       0.82999
     -0.00922 |       0.00000 |       0.00066 |       0.00419 |       0.82967
Evaluating losses...
     -0.00968 |       0.00000 |       0.00066 |       0.00441 |     

     -0.00865 |       0.00000 |       0.00148 |       0.00352 |       0.80020
     -0.00917 |       0.00000 |       0.00143 |       0.00363 |       0.80056
Evaluating losses...
     -0.00952 |       0.00000 |       0.00137 |       0.00387 |       0.80118
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.01          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 3642          |
| TimeElapsed     | 1.27e+04      |
| TimestepsSoFar  | 8900608       |
| ev_tdlam_before | 0.331         |
| loss_ent        | 0.80118155    |
| loss_kl         | 0.0038651992  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0095172515 |
| loss_vf_loss    | 0.0013671869  |
-----------------------------------
********** Iteration 2173 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00179 |       0.00000 |       0.00570 |       0.00237 |       0.76871
     -0.00561 |       0.00000 | 

********** Iteration 2178 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00024 |       0.00000 |       0.00133 |       0.00119 |       0.81246
     -0.00459 |       0.00000 |       0.00096 |       0.00157 |       0.81153
     -0.00601 |       0.00000 |       0.00088 |       0.00206 |       0.81167
     -0.00659 |       0.00000 |       0.00080 |       0.00236 |       0.81252
     -0.00842 |       0.00000 |       0.00073 |       0.00276 |       0.81397
     -0.00859 |       0.00000 |       0.00068 |       0.00295 |       0.81271
     -0.00902 |       0.00000 |       0.00062 |       0.00314 |       0.81299
     -0.00970 |       0.00000 |       0.00059 |       0.00354 |       0.81312
     -0.00960 |       0.00000 |       0.00055 |       0.00394 |       0.81499
     -0.01015 |       0.00000 |       0.00054 |       0.00424 |       0.81403
Evaluating losses...
     -0.01120 |       0.00000 |       0.00049 |       0.00453 |     

     -0.00818 |       0.00000 |      5.92e-05 |       0.00353 |       0.87644
     -0.00859 |       0.00000 |      5.88e-05 |       0.00368 |       0.87630
Evaluating losses...
     -0.01033 |       0.00000 |      6.17e-05 |       0.00366 |       0.87721
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.02        |
| EpThisIter      | 2            |
| EpisodesSoFar   | 3657         |
| TimeElapsed     | 1.27e+04     |
| TimestepsSoFar  | 8945664      |
| ev_tdlam_before | -1.2         |
| loss_ent        | 0.8772108    |
| loss_kl         | 0.0036602314 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010332048 |
| loss_vf_loss    | 6.171705e-05 |
----------------------------------
********** Iteration 2184 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00195 |       0.00000 |      5.40e-05 |       0.00150 |       0.83021
     -0.00577 |       0.00000 |      4.40e-05 

********** Iteration 2189 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00227 |       0.00000 |       0.00100 |       0.00175 |       0.82112
     -0.00722 |       0.00000 |       0.00065 |       0.00284 |       0.82206
     -0.00786 |       0.00000 |       0.00063 |       0.00303 |       0.82257
     -0.00866 |       0.00000 |       0.00058 |       0.00256 |       0.82203
     -0.00876 |       0.00000 |       0.00057 |       0.00287 |       0.82206
     -0.00899 |       0.00000 |       0.00055 |       0.00329 |       0.82381
     -0.00962 |       0.00000 |       0.00055 |       0.00301 |       0.82268
     -0.01002 |       0.00000 |       0.00054 |       0.00307 |       0.82295
     -0.00976 |       0.00000 |       0.00051 |       0.00341 |       0.82301
     -0.01028 |       0.00000 |       0.00051 |       0.00325 |       0.82212
Evaluating losses...
     -0.01081 |       0.00000 |       0.00050 |       0.00349 |     

     -0.00881 |       0.00000 |       0.00178 |       0.00348 |       0.84309
     -0.00894 |       0.00000 |       0.00173 |       0.00372 |       0.84218
     -0.00923 |       0.00000 |       0.00168 |       0.00373 |       0.84322
Evaluating losses...
     -0.00969 |       0.00000 |       0.00159 |       0.00397 |       0.84127
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.01         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 3672         |
| TimeElapsed     | 1.28e+04     |
| TimestepsSoFar  | 8990720      |
| ev_tdlam_before | 0.415        |
| loss_ent        | 0.84126514   |
| loss_kl         | 0.0039709588 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009694019 |
| loss_vf_loss    | 0.001588297  |
----------------------------------
********** Iteration 2195 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00042 |       0.00000 |       0.00243 

********** Iteration 2200 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     8.65e-05 |       0.00000 |       0.00248 |       0.00307 |       0.79964
     -0.00452 |       0.00000 |       0.00197 |       0.00422 |       0.80011
     -0.00577 |       0.00000 |       0.00188 |       0.00355 |       0.79970
     -0.00775 |       0.00000 |       0.00183 |       0.00377 |       0.79982
     -0.00835 |       0.00000 |       0.00175 |       0.00445 |       0.80046
     -0.00949 |       0.00000 |       0.00172 |       0.00446 |       0.80163
     -0.00945 |       0.00000 |       0.00174 |       0.00502 |       0.80173
     -0.01033 |       0.00000 |       0.00170 |       0.00541 |       0.80170
     -0.01046 |       0.00000 |       0.00169 |       0.00589 |       0.80160
     -0.01046 |       0.00000 |       0.00164 |       0.00715 |       0.80140
Evaluating losses...
     -0.01186 |       0.00000 |       0.00161 |       0.00673 |     

     -0.00838 |       0.00000 |       0.00035 |       0.00236 |       0.81864
     -0.00847 |       0.00000 |       0.00035 |       0.00240 |       0.81893
     -0.00912 |       0.00000 |       0.00031 |       0.00261 |       0.81873
Evaluating losses...
     -0.00930 |       0.00000 |       0.00031 |       0.00294 |       0.81794
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.01         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3686         |
| TimeElapsed     | 1.3e+04      |
| TimestepsSoFar  | 9035776      |
| ev_tdlam_before | 0.688        |
| loss_ent        | 0.8179445    |
| loss_kl         | 0.002939213  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009296278 |
| loss_vf_loss    | 0.0003070497 |
----------------------------------
********** Iteration 2206 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00086 |       0.00000 |       0.00344 

********** Iteration 2211 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00043 |       0.00000 |       0.00241 |       0.00324 |       0.79322
     -0.00415 |       0.00000 |       0.00210 |       0.00171 |       0.79079
     -0.00493 |       0.00000 |       0.00197 |       0.00196 |       0.78757
     -0.00617 |       0.00000 |       0.00187 |       0.00224 |       0.78962
     -0.00759 |       0.00000 |       0.00183 |       0.00281 |       0.78943
     -0.00613 |       0.00000 |       0.00177 |       0.00294 |       0.79216
     -0.00801 |       0.00000 |       0.00173 |       0.00292 |       0.78973
     -0.00951 |       0.00000 |       0.00168 |       0.00290 |       0.79001
     -0.00878 |       0.00000 |       0.00165 |       0.00340 |       0.78891
     -0.00918 |       0.00000 |       0.00161 |       0.00342 |       0.78871
Evaluating losses...
     -0.01059 |       0.00000 |       0.00153 |       0.00349 |     

     -0.01239 |       0.00000 |       0.00268 |       0.00447 |       0.79930
     -0.01261 |       0.00000 |       0.00266 |       0.00443 |       0.80053
Evaluating losses...
     -0.01382 |       0.00000 |       0.00256 |       0.00419 |       0.79983
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.07         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3701         |
| TimeElapsed     | 1.3e+04      |
| TimestepsSoFar  | 9080832      |
| ev_tdlam_before | 0.275        |
| loss_ent        | 0.7998311    |
| loss_kl         | 0.004194234  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.013821658 |
| loss_vf_loss    | 0.0025572418 |
----------------------------------
********** Iteration 2217 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00038 |       0.00000 |       0.00583 |       0.00382 |       0.79701
     -0.00423 |       0.00000 |       0.00471 

********** Iteration 2222 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00170 |       0.00000 |       0.00344 |       0.00137 |       0.83106
     -0.00395 |       0.00000 |       0.00245 |       0.00183 |       0.82869
     -0.00600 |       0.00000 |       0.00177 |       0.00164 |       0.82971
     -0.00738 |       0.00000 |       0.00134 |       0.00222 |       0.82932
     -0.00804 |       0.00000 |       0.00123 |       0.00264 |       0.82774
     -0.00862 |       0.00000 |       0.00115 |       0.00272 |       0.82894
     -0.00912 |       0.00000 |       0.00111 |       0.00321 |       0.82863
     -0.00970 |       0.00000 |       0.00111 |       0.00354 |       0.82743
     -0.00996 |       0.00000 |       0.00105 |       0.00375 |       0.82723
     -0.01021 |       0.00000 |       0.00104 |       0.00369 |       0.82703
Evaluating losses...
     -0.01095 |       0.00000 |       0.00099 |       0.00376 |     

     -0.00961 |       0.00000 |       0.00098 |       0.00405 |       0.85239
     -0.01057 |       0.00000 |       0.00097 |       0.00444 |       0.85240
Evaluating losses...
     -0.01141 |       0.00000 |       0.00095 |       0.00419 |       0.85309
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.13          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3716          |
| TimeElapsed     | 1.31e+04      |
| TimestepsSoFar  | 9125888       |
| ev_tdlam_before | 0.492         |
| loss_ent        | 0.85309184    |
| loss_kl         | 0.0041882866  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.011410891  |
| loss_vf_loss    | 0.00094732194 |
-----------------------------------
********** Iteration 2228 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00015 |       0.00000 |       0.00019 |       0.00142 |       0.82962
     -0.00765 |       0.00000 | 

********** Iteration 2233 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00110 |       0.00000 |       0.00199 |       0.00121 |       0.80191
     -0.00250 |       0.00000 |       0.00149 |       0.00339 |       0.80260
     -0.00552 |       0.00000 |       0.00127 |       0.00223 |       0.80235
     -0.00594 |       0.00000 |       0.00107 |       0.00208 |       0.80210
     -0.00642 |       0.00000 |       0.00093 |       0.00215 |       0.80256
     -0.00705 |       0.00000 |       0.00080 |       0.00233 |       0.80243
     -0.00705 |       0.00000 |       0.00073 |       0.00237 |       0.80200
     -0.00786 |       0.00000 |       0.00067 |       0.00224 |       0.80240
     -0.00819 |       0.00000 |       0.00062 |       0.00237 |       0.80270
     -0.00833 |       0.00000 |       0.00060 |       0.00250 |       0.80256
Evaluating losses...
     -0.00851 |       0.00000 |       0.00053 |       0.00264 |     

     -0.00749 |       0.00000 |       0.00077 |       0.00374 |       0.82572
     -0.00850 |       0.00000 |       0.00075 |       0.00325 |       0.82913
     -0.00900 |       0.00000 |       0.00072 |       0.00362 |       0.83005
Evaluating losses...
     -0.00958 |       0.00000 |       0.00065 |       0.00374 |       0.82831
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.09         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3731         |
| TimeElapsed     | 1.31e+04     |
| TimestepsSoFar  | 9170944      |
| ev_tdlam_before | -0.0649      |
| loss_ent        | 0.828309     |
| loss_kl         | 0.0037387204 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009577589 |
| loss_vf_loss    | 0.0006547001 |
----------------------------------
********** Iteration 2239 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     1.58e-05 |       0.00000 |       0.00041 

********** Iteration 2244 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00031 |       0.00000 |       0.00534 |       0.00155 |       0.85097
     -0.00613 |       0.00000 |       0.00429 |       0.00351 |       0.84875
     -0.00827 |       0.00000 |       0.00400 |       0.00511 |       0.84860
     -0.01010 |       0.00000 |       0.00373 |       0.00457 |       0.84828
     -0.01122 |       0.00000 |       0.00354 |       0.00456 |       0.84582
     -0.01143 |       0.00000 |       0.00336 |       0.00453 |       0.84591
     -0.01182 |       0.00000 |       0.00327 |       0.00465 |       0.84550
     -0.01212 |       0.00000 |       0.00318 |       0.00512 |       0.84642
     -0.01257 |       0.00000 |       0.00309 |       0.00555 |       0.84569
     -0.01318 |       0.00000 |       0.00301 |       0.00537 |       0.84633
Evaluating losses...
     -0.01368 |       0.00000 |       0.00294 |       0.00565 |     

     -0.00737 |       0.00000 |       0.00053 |       0.00332 |       0.81327
     -0.00731 |       0.00000 |       0.00052 |       0.00362 |       0.81585
Evaluating losses...
     -0.00832 |       0.00000 |       0.00050 |       0.00358 |       0.81511
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.02          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 3747          |
| TimeElapsed     | 1.31e+04      |
| TimestepsSoFar  | 9216000       |
| ev_tdlam_before | 0.634         |
| loss_ent        | 0.81510806    |
| loss_kl         | 0.0035832427  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.008319145  |
| loss_vf_loss    | 0.00049772725 |
-----------------------------------
********** Iteration 2250 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00175 |       0.00000 |       0.00254 |       0.00137 |       0.82247
     -0.00680 |       0.00000 | 

********** Iteration 2255 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00357 |       0.00000 |       0.00274 |       0.00258 |       0.78013
     -0.00147 |       0.00000 |       0.00212 |       0.01061 |       0.77196
     -0.00619 |       0.00000 |       0.00195 |       0.00978 |       0.76972
     -0.00795 |       0.00000 |       0.00189 |       0.00561 |       0.77134
     -0.00852 |       0.00000 |       0.00185 |       0.00524 |       0.77339
     -0.00902 |       0.00000 |       0.00177 |       0.00496 |       0.77497
     -0.00991 |       0.00000 |       0.00178 |       0.00476 |       0.77424
     -0.01024 |       0.00000 |       0.00171 |       0.00480 |       0.77411
     -0.01042 |       0.00000 |       0.00166 |       0.00482 |       0.77435
     -0.01047 |       0.00000 |       0.00165 |       0.00521 |       0.77316
Evaluating losses...
     -0.01145 |       0.00000 |       0.00158 |       0.00508 |     

     -0.01175 |       0.00000 |      1.79e-05 |       0.00366 |       0.79494
     -0.01232 |       0.00000 |      1.70e-05 |       0.00378 |       0.79404
     -0.01259 |       0.00000 |      1.63e-05 |       0.00439 |       0.79387
     -0.01332 |       0.00000 |      1.61e-05 |       0.00451 |       0.79423
Evaluating losses...
     -0.01441 |       0.00000 |      1.64e-05 |       0.00442 |       0.79326
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.04          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3761          |
| TimeElapsed     | 1.34e+04      |
| TimestepsSoFar  | 9261056       |
| ev_tdlam_before | -2.01         |
| loss_ent        | 0.79325837    |
| loss_kl         | 0.004424968   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.014410885  |
| loss_vf_loss    | 1.6363174e-05 |
-----------------------------------
********** Iteration 2261 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 2266 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00103 |       0.00000 |       0.00167 |       0.00174 |       0.82763
     -0.00602 |       0.00000 |       0.00119 |       0.00218 |       0.82841
     -0.00720 |       0.00000 |       0.00105 |       0.00279 |       0.82760
     -0.00781 |       0.00000 |       0.00095 |       0.00313 |       0.82883
     -0.00844 |       0.00000 |       0.00082 |       0.00378 |       0.82832
     -0.00900 |       0.00000 |       0.00080 |       0.00360 |       0.82857
     -0.00937 |       0.00000 |       0.00073 |       0.00375 |       0.82809
     -0.00947 |       0.00000 |       0.00071 |       0.00426 |       0.82789
     -0.00982 |       0.00000 |       0.00067 |       0.00450 |       0.82825
     -0.01018 |       0.00000 |       0.00061 |       0.00392 |       0.82815
Evaluating losses...
     -0.01083 |       0.00000 |       0.00058 |       0.00434 |     

     -0.00992 |       0.00000 |      2.59e-05 |       0.00462 |       0.80823
     -0.01072 |       0.00000 |      2.49e-05 |       0.00470 |       0.81076
     -0.01089 |       0.00000 |      2.35e-05 |       0.00480 |       0.81001
Evaluating losses...
     -0.01170 |       0.00000 |      2.26e-05 |       0.00463 |       0.81165
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.02          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3776          |
| TimeElapsed     | 1.34e+04      |
| TimestepsSoFar  | 9306112       |
| ev_tdlam_before | -2.59         |
| loss_ent        | 0.811655      |
| loss_kl         | 0.004631574   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.011700617  |
| loss_vf_loss    | 2.2567257e-05 |
-----------------------------------
********** Iteration 2272 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00189 |       0.00000 | 

********** Iteration 2277 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00074 |       0.00000 |       0.00366 |       0.00227 |       0.80139
     -0.00452 |       0.00000 |       0.00269 |       0.00247 |       0.80121
     -0.00622 |       0.00000 |       0.00206 |       0.00244 |       0.80111
     -0.00713 |       0.00000 |       0.00176 |       0.00185 |       0.80226
     -0.00814 |       0.00000 |       0.00159 |       0.00237 |       0.80188
     -0.00860 |       0.00000 |       0.00150 |       0.00226 |       0.80319
     -0.00892 |       0.00000 |       0.00143 |       0.00233 |       0.80304
     -0.00931 |       0.00000 |       0.00137 |       0.00252 |       0.80323
     -0.00832 |       0.00000 |       0.00136 |       0.00334 |       0.80452
     -0.00965 |       0.00000 |       0.00130 |       0.00298 |       0.80473
Evaluating losses...
     -0.01008 |       0.00000 |       0.00123 |       0.00290 |     

     -0.01127 |       0.00000 |       0.00061 |       0.00512 |       0.80513
     -0.01149 |       0.00000 |       0.00060 |       0.00545 |       0.80599
Evaluating losses...
     -0.01225 |       0.00000 |       0.00055 |       0.00536 |       0.80447
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.1           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3791          |
| TimeElapsed     | 1.35e+04      |
| TimestepsSoFar  | 9351168       |
| ev_tdlam_before | 0.593         |
| loss_ent        | 0.8044662     |
| loss_kl         | 0.005363789   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.012249912  |
| loss_vf_loss    | 0.00054977275 |
-----------------------------------
********** Iteration 2283 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00075 |       0.00000 |       0.00078 |       0.00176 |       0.82409
     -0.00581 |       0.00000 | 

********** Iteration 2288 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00137 |       0.00000 |       0.00185 |       0.00085 |       0.78813
     -0.00235 |       0.00000 |       0.00112 |       0.00173 |       0.78685
     -0.00437 |       0.00000 |       0.00092 |       0.00204 |       0.78750
     -0.00441 |       0.00000 |       0.00087 |       0.00245 |       0.78393
     -0.00476 |       0.00000 |       0.00084 |       0.00244 |       0.78577
     -0.00597 |       0.00000 |       0.00082 |       0.00253 |       0.78682
     -0.00557 |       0.00000 |       0.00082 |       0.00286 |       0.78643
     -0.00661 |       0.00000 |       0.00081 |       0.00295 |       0.78707
     -0.00695 |       0.00000 |       0.00082 |       0.00309 |       0.78681
     -0.00706 |       0.00000 |       0.00080 |       0.00308 |       0.78655
Evaluating losses...
     -0.00766 |       0.00000 |       0.00078 |       0.00331 |     

     -0.00938 |       0.00000 |       0.00230 |       0.00367 |       0.76051
     -0.01002 |       0.00000 |       0.00228 |       0.00412 |       0.76307
     -0.01007 |       0.00000 |       0.00225 |       0.00520 |       0.76191
Evaluating losses...
     -0.01111 |       0.00000 |       0.00219 |       0.00525 |       0.76194
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.06         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3806         |
| TimeElapsed     | 1.35e+04     |
| TimestepsSoFar  | 9396224      |
| ev_tdlam_before | 0.726        |
| loss_ent        | 0.7619412    |
| loss_kl         | 0.0052531958 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.011112623 |
| loss_vf_loss    | 0.0021887398 |
----------------------------------
********** Iteration 2294 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00246 |       0.00000 |       0.00695 

********** Iteration 2299 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00139 |       0.00000 |       0.00298 |       0.00419 |       0.78059
     -0.00462 |       0.00000 |       0.00222 |       0.00477 |       0.77916
     -0.00618 |       0.00000 |       0.00202 |       0.00291 |       0.78281
     -0.00756 |       0.00000 |       0.00189 |       0.00256 |       0.78482
     -0.00828 |       0.00000 |       0.00184 |       0.00243 |       0.78542
     -0.00867 |       0.00000 |       0.00176 |       0.00292 |       0.78579
     -0.00910 |       0.00000 |       0.00169 |       0.00290 |       0.78397
     -0.00964 |       0.00000 |       0.00164 |       0.00372 |       0.78642
     -0.01056 |       0.00000 |       0.00161 |       0.00377 |       0.78768
     -0.01036 |       0.00000 |       0.00159 |       0.00360 |       0.78677
Evaluating losses...
     -0.01123 |       0.00000 |       0.00151 |       0.00360 |     

     -0.01070 |       0.00000 |       0.00016 |       0.00394 |       0.82020
     -0.01166 |       0.00000 |       0.00015 |       0.00404 |       0.82115
     -0.01190 |       0.00000 |       0.00014 |       0.00418 |       0.82138
Evaluating losses...
     -0.01351 |       0.00000 |       0.00014 |       0.00431 |       0.82181
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.04         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3821          |
| TimeElapsed     | 1.35e+04      |
| TimestepsSoFar  | 9441280       |
| ev_tdlam_before | -2.06         |
| loss_ent        | 0.8218089     |
| loss_kl         | 0.00431349    |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.013508153  |
| loss_vf_loss    | 0.00014016917 |
-----------------------------------
********** Iteration 2305 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00059 |       0.00000 | 

********** Iteration 2310 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00018 |       0.00000 |       0.00011 |       0.00099 |       0.78960
     -0.00544 |       0.00000 |      6.17e-05 |       0.00196 |       0.78598
     -0.00685 |       0.00000 |      5.48e-05 |       0.00195 |       0.78878
     -0.00835 |       0.00000 |      5.03e-05 |       0.00207 |       0.78879
     -0.00849 |       0.00000 |      4.79e-05 |       0.00236 |       0.78756
     -0.00974 |       0.00000 |      4.67e-05 |       0.00259 |       0.78817
     -0.00981 |       0.00000 |      4.33e-05 |       0.00304 |       0.78797
     -0.01053 |       0.00000 |      4.14e-05 |       0.00322 |       0.78794
     -0.01113 |       0.00000 |      4.01e-05 |       0.00338 |       0.78842
     -0.01097 |       0.00000 |      3.95e-05 |       0.00381 |       0.78773
Evaluating losses...
     -0.01250 |       0.00000 |      3.77e-05 |       0.00363 |     

     -0.01056 |       0.00000 |       0.00198 |       0.00387 |       0.75275
     -0.01059 |       0.00000 |       0.00199 |       0.00396 |       0.75376
     -0.01116 |       0.00000 |       0.00195 |       0.00412 |       0.75352
Evaluating losses...
     -0.01198 |       0.00000 |       0.00192 |       0.00411 |       0.75381
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0            |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3836         |
| TimeElapsed     | 1.36e+04     |
| TimestepsSoFar  | 9486336      |
| ev_tdlam_before | 0.668        |
| loss_ent        | 0.75381017   |
| loss_kl         | 0.0041070813 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.011978104 |
| loss_vf_loss    | 0.0019158917 |
----------------------------------
********** Iteration 2316 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00174 |       0.00000 |       0.00024 

********** Iteration 2321 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.08e-05 |       0.00000 |       0.00022 |       0.00149 |       0.77139
     -0.00448 |       0.00000 |       0.00010 |       0.00204 |       0.76784
     -0.00599 |       0.00000 |      7.61e-05 |       0.00292 |       0.76707
     -0.00666 |       0.00000 |      6.39e-05 |       0.00318 |       0.76725
     -0.00730 |       0.00000 |      5.65e-05 |       0.00311 |       0.76813
     -0.00772 |       0.00000 |      5.14e-05 |       0.00337 |       0.76905
     -0.00816 |       0.00000 |      4.79e-05 |       0.00371 |       0.76919
     -0.00831 |       0.00000 |      4.48e-05 |       0.00414 |       0.76918
     -0.00865 |       0.00000 |      4.30e-05 |       0.00420 |       0.76993
     -0.00887 |       0.00000 |      4.14e-05 |       0.00463 |       0.77024
Evaluating losses...
     -0.00969 |       0.00000 |      4.00e-05 |       0.00520 |     

     -0.00824 |       0.00000 |       0.00176 |       0.00278 |       0.77606
     -0.00842 |       0.00000 |       0.00174 |       0.00307 |       0.77742
     -0.00826 |       0.00000 |       0.00165 |       0.00341 |       0.77613
Evaluating losses...
     -0.00905 |       0.00000 |       0.00164 |       0.00357 |       0.77561
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | -0.04        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3851         |
| TimeElapsed     | 1.38e+04     |
| TimestepsSoFar  | 9531392      |
| ev_tdlam_before | 0.245        |
| loss_ent        | 0.7756113    |
| loss_kl         | 0.0035729802 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009050422 |
| loss_vf_loss    | 0.001641753  |
----------------------------------
********** Iteration 2327 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00087 |       0.00000 |       0.00098 

********** Iteration 2332 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00046 |       0.00000 |       0.00146 |       0.00092 |       0.76624
     -0.00518 |       0.00000 |       0.00083 |       0.00234 |       0.76730
     -0.00752 |       0.00000 |       0.00076 |       0.00237 |       0.76577
     -0.00800 |       0.00000 |       0.00071 |       0.00269 |       0.76545
     -0.00901 |       0.00000 |       0.00069 |       0.00356 |       0.76559
     -0.01032 |       0.00000 |       0.00066 |       0.00320 |       0.76573
     -0.00912 |       0.00000 |       0.00063 |       0.00405 |       0.76381
     -0.01131 |       0.00000 |       0.00062 |       0.00393 |       0.76571
     -0.01140 |       0.00000 |       0.00061 |       0.00491 |       0.76325
     -0.01155 |       0.00000 |       0.00060 |       0.00475 |       0.76569
Evaluating losses...
     -0.01268 |       0.00000 |       0.00058 |       0.00501 |     

     -0.01149 |       0.00000 |       0.00136 |       0.00358 |       0.74674
     -0.01164 |       0.00000 |       0.00133 |       0.00366 |       0.74589
     -0.01228 |       0.00000 |       0.00131 |       0.00401 |       0.74754
Evaluating losses...
     -0.01285 |       0.00000 |       0.00123 |       0.00395 |       0.74707
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.01         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3866         |
| TimeElapsed     | 1.38e+04     |
| TimestepsSoFar  | 9576448      |
| ev_tdlam_before | 0.416        |
| loss_ent        | 0.7470739    |
| loss_kl         | 0.0039497963 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.012853226 |
| loss_vf_loss    | 0.0012321107 |
----------------------------------
********** Iteration 2338 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00268 |       0.00000 |       0.00025 

********** Iteration 2343 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00177 |       0.00000 |       0.00187 |       0.00042 |       0.77296
     -0.00528 |       0.00000 |       0.00163 |       0.00100 |       0.77256
     -0.00512 |       0.00000 |       0.00149 |       0.00188 |       0.77240
     -0.00620 |       0.00000 |       0.00137 |       0.00180 |       0.77148
     -0.00688 |       0.00000 |       0.00127 |       0.00208 |       0.77044
     -0.00741 |       0.00000 |       0.00122 |       0.00213 |       0.77067
     -0.00784 |       0.00000 |       0.00121 |       0.00222 |       0.77015
     -0.00783 |       0.00000 |       0.00119 |       0.00240 |       0.76984
     -0.00789 |       0.00000 |       0.00113 |       0.00268 |       0.77009
     -0.00823 |       0.00000 |       0.00114 |       0.00265 |       0.76970
Evaluating losses...
     -0.00867 |       0.00000 |       0.00107 |       0.00282 |     

     -0.00505 |       0.00000 |       0.00064 |       0.00291 |       0.74185
     -0.00544 |       0.00000 |       0.00061 |       0.00318 |       0.74133
Evaluating losses...
     -0.00598 |       0.00000 |       0.00059 |       0.00344 |       0.74199
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | -0.02         |
| EpThisIter      | 1             |
| EpisodesSoFar   | 3881          |
| TimeElapsed     | 1.39e+04      |
| TimestepsSoFar  | 9621504       |
| ev_tdlam_before | 0.357         |
| loss_ent        | 0.7419878     |
| loss_kl         | 0.0034366017  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0059813536 |
| loss_vf_loss    | 0.0005927026  |
-----------------------------------
********** Iteration 2349 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00056 |       0.00000 |       0.00160 |       0.00071 |       0.77684
     -0.00488 |       0.00000 | 

********** Iteration 2354 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00174 |       0.00000 |       0.00106 |       0.00198 |       0.73053
     -0.00250 |       0.00000 |       0.00088 |       0.00174 |       0.73250
     -0.00386 |       0.00000 |       0.00082 |       0.00150 |       0.73536
     -0.00491 |       0.00000 |       0.00075 |       0.00159 |       0.73639
     -0.00566 |       0.00000 |       0.00068 |       0.00198 |       0.73644
     -0.00617 |       0.00000 |       0.00059 |       0.00202 |       0.73718
     -0.00646 |       0.00000 |       0.00057 |       0.00191 |       0.73656
     -0.00687 |       0.00000 |       0.00052 |       0.00183 |       0.73689
     -0.00718 |       0.00000 |       0.00048 |       0.00195 |       0.73657
     -0.00709 |       0.00000 |       0.00048 |       0.00249 |       0.73526
Evaluating losses...
     -0.00724 |       0.00000 |       0.00044 |       0.00332 |     

     -0.00964 |       0.00000 |       0.00327 |       0.00378 |       0.74787
     -0.01016 |       0.00000 |       0.00323 |       0.00430 |       0.74643
     -0.01006 |       0.00000 |       0.00321 |       0.00469 |       0.74679
Evaluating losses...
     -0.01079 |       0.00000 |       0.00312 |       0.00520 |       0.74602
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | -0.01        |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3896         |
| TimeElapsed     | 1.39e+04     |
| TimestepsSoFar  | 9666560      |
| ev_tdlam_before | 0.566        |
| loss_ent        | 0.74601513   |
| loss_kl         | 0.005204624  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010790495 |
| loss_vf_loss    | 0.0031229923 |
----------------------------------
********** Iteration 2360 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00095 |       0.00000 |       0.00368 

********** Iteration 2365 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00080 |       0.00000 |       0.00281 |       0.00070 |       0.76957
     -0.00336 |       0.00000 |       0.00208 |       0.00134 |       0.77069
     -0.00483 |       0.00000 |       0.00188 |       0.00156 |       0.77138
     -0.00584 |       0.00000 |       0.00172 |       0.00188 |       0.77130
     -0.00642 |       0.00000 |       0.00165 |       0.00187 |       0.77121
     -0.00714 |       0.00000 |       0.00159 |       0.00232 |       0.77124
     -0.00757 |       0.00000 |       0.00160 |       0.00249 |       0.77100
     -0.00757 |       0.00000 |       0.00152 |       0.00288 |       0.77169
     -0.00838 |       0.00000 |       0.00148 |       0.00307 |       0.77209
     -0.00838 |       0.00000 |       0.00146 |       0.00327 |       0.77171
Evaluating losses...
     -0.00916 |       0.00000 |       0.00139 |       0.00354 |     

     -0.00719 |       0.00000 |       0.00087 |       0.00370 |       0.76681
     -0.00713 |       0.00000 |       0.00088 |       0.00313 |       0.76478
     -0.00764 |       0.00000 |       0.00089 |       0.00346 |       0.76378
Evaluating losses...
     -0.00809 |       0.00000 |       0.00084 |       0.00433 |       0.76607
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.04         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 3911         |
| TimeElapsed     | 1.4e+04      |
| TimestepsSoFar  | 9711616      |
| ev_tdlam_before | 0.727        |
| loss_ent        | 0.7660739    |
| loss_kl         | 0.004330846  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008094972 |
| loss_vf_loss    | 0.0008445716 |
----------------------------------
********** Iteration 2371 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00051 |       0.00000 |      6.96e-05 

********** Iteration 2376 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00040 |       0.00000 |       0.00230 |       0.00161 |       0.77089
     -0.00366 |       0.00000 |       0.00183 |       0.00263 |       0.76731
     -0.00415 |       0.00000 |       0.00173 |       0.00281 |       0.76769
     -0.00574 |       0.00000 |       0.00165 |       0.00228 |       0.76801
     -0.00641 |       0.00000 |       0.00161 |       0.00238 |       0.76876
     -0.00701 |       0.00000 |       0.00157 |       0.00267 |       0.76871
     -0.00758 |       0.00000 |       0.00150 |       0.00292 |       0.77046
     -0.00791 |       0.00000 |       0.00151 |       0.00317 |       0.76977
     -0.00830 |       0.00000 |       0.00148 |       0.00357 |       0.76993
     -0.00814 |       0.00000 |       0.00142 |       0.00376 |       0.77117
Evaluating losses...
     -0.00925 |       0.00000 |       0.00138 |       0.00364 |     

     -0.00902 |       0.00000 |       0.00137 |       0.00397 |       0.77240
     -0.00914 |       0.00000 |       0.00136 |       0.00415 |       0.77098
     -0.00937 |       0.00000 |       0.00134 |       0.00438 |       0.77316
Evaluating losses...
     -0.00988 |       0.00000 |       0.00134 |       0.00452 |       0.77282
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.07         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 3926         |
| TimeElapsed     | 1.42e+04     |
| TimestepsSoFar  | 9756672      |
| ev_tdlam_before | 0.578        |
| loss_ent        | 0.77282053   |
| loss_kl         | 0.0045194225 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009876957 |
| loss_vf_loss    | 0.0013377726 |
----------------------------------
********** Iteration 2382 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00254 |       0.00000 |       0.00010 

********** Iteration 2387 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00191 |       0.00000 |       0.00297 |       0.00654 |       0.81125
     -0.00327 |       0.00000 |       0.00219 |       0.00449 |       0.81424
     -0.00536 |       0.00000 |       0.00194 |       0.00479 |       0.81498
     -0.00634 |       0.00000 |       0.00188 |       0.00433 |       0.81464
     -0.00698 |       0.00000 |       0.00181 |       0.00400 |       0.81337
     -0.00705 |       0.00000 |       0.00174 |       0.00484 |       0.81450
     -0.00826 |       0.00000 |       0.00169 |       0.00540 |       0.81433
     -0.00831 |       0.00000 |       0.00166 |       0.00504 |       0.81336
     -0.00891 |       0.00000 |       0.00164 |       0.00468 |       0.81257
     -0.00874 |       0.00000 |       0.00162 |       0.00559 |       0.81179
Evaluating losses...
     -0.00957 |       0.00000 |       0.00156 |       0.00616 |     

     -0.01454 |       0.00000 |      3.50e-05 |       0.00381 |       0.81051
     -0.01468 |       0.00000 |      3.33e-05 |       0.00421 |       0.81140
Evaluating losses...
     -0.01605 |       0.00000 |      3.22e-05 |       0.00400 |       0.80947
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.1           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 3941          |
| TimeElapsed     | 1.42e+04      |
| TimestepsSoFar  | 9801728       |
| ev_tdlam_before | -0.392        |
| loss_ent        | 0.8094678     |
| loss_kl         | 0.003998143   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.016046401  |
| loss_vf_loss    | 3.2203418e-05 |
-----------------------------------
********** Iteration 2393 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00045 |       0.00000 |       0.00038 |       0.00098 |       0.84171
     -0.00435 |       0.00000 | 

********** Iteration 2398 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00121 |       0.00000 |       0.00246 |       0.00073 |       0.76936
     -0.00376 |       0.00000 |       0.00150 |       0.00146 |       0.77153
     -0.00471 |       0.00000 |       0.00136 |       0.00142 |       0.76914
     -0.00601 |       0.00000 |       0.00127 |       0.00181 |       0.77010
     -0.00716 |       0.00000 |       0.00122 |       0.00210 |       0.76891
     -0.00751 |       0.00000 |       0.00113 |       0.00217 |       0.76843
     -0.00737 |       0.00000 |       0.00109 |       0.00249 |       0.77081
     -0.00823 |       0.00000 |       0.00109 |       0.00278 |       0.77129
     -0.00883 |       0.00000 |       0.00102 |       0.00286 |       0.77097
     -0.00929 |       0.00000 |       0.00096 |       0.00292 |       0.77086
Evaluating losses...
     -0.00994 |       0.00000 |       0.00093 |       0.00298 |     

     -0.01146 |       0.00000 |      3.49e-05 |       0.00330 |       0.79586
     -0.01251 |       0.00000 |      3.37e-05 |       0.00359 |       0.79609
     -0.01300 |       0.00000 |      3.19e-05 |       0.00340 |       0.79610
Evaluating losses...
     -0.01538 |       0.00000 |      3.15e-05 |       0.00335 |       0.79616
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.15          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 3956          |
| TimeElapsed     | 1.43e+04      |
| TimestepsSoFar  | 9846784       |
| ev_tdlam_before | -0.653        |
| loss_ent        | 0.7961568     |
| loss_kl         | 0.0033468886  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.01538427   |
| loss_vf_loss    | 3.1502317e-05 |
-----------------------------------
********** Iteration 2404 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00131 |       0.00000 | 

********** Iteration 2409 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00612 |       0.00000 |       0.00039 |       0.00176 |       0.78910
     -0.00249 |       0.00000 |       0.00022 |       0.00238 |       0.79162
     -0.00465 |       0.00000 |       0.00018 |       0.00192 |       0.79071
     -0.00552 |       0.00000 |       0.00017 |       0.00287 |       0.79151
     -0.00844 |       0.00000 |       0.00016 |       0.00254 |       0.78866
     -0.00959 |       0.00000 |       0.00015 |       0.00258 |       0.79059
     -0.01066 |       0.00000 |       0.00014 |       0.00279 |       0.79033
     -0.01103 |       0.00000 |       0.00014 |       0.00311 |       0.79081
     -0.01201 |       0.00000 |       0.00013 |       0.00332 |       0.78989
     -0.01224 |       0.00000 |       0.00013 |       0.00345 |       0.79142
Evaluating losses...
     -0.01382 |       0.00000 |       0.00013 |       0.00346 |     

     -0.00738 |       0.00000 |       0.00146 |       0.00395 |       0.78954
     -0.00824 |       0.00000 |       0.00146 |       0.00428 |       0.78893
     -0.00814 |       0.00000 |       0.00143 |       0.00442 |       0.78927
Evaluating losses...
     -0.00943 |       0.00000 |       0.00139 |       0.00422 |       0.78823
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.23          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 3971          |
| TimeElapsed     | 1.43e+04      |
| TimestepsSoFar  | 9891840       |
| ev_tdlam_before | 0.712         |
| loss_ent        | 0.7882342     |
| loss_kl         | 0.004218162   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0094252955 |
| loss_vf_loss    | 0.0013867639  |
-----------------------------------
********** Iteration 2415 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     7.97e-05 |       0.00000 | 

********** Iteration 2420 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00014 |       0.00000 |       0.00610 |       0.00063 |       0.75921
     -0.00453 |       0.00000 |       0.00506 |       0.00142 |       0.75844
     -0.00661 |       0.00000 |       0.00427 |       0.00146 |       0.76027
     -0.00733 |       0.00000 |       0.00382 |       0.00188 |       0.75862
     -0.00820 |       0.00000 |       0.00362 |       0.00199 |       0.75937
     -0.00896 |       0.00000 |       0.00345 |       0.00220 |       0.75929
     -0.00960 |       0.00000 |       0.00336 |       0.00242 |       0.76021
     -0.00985 |       0.00000 |       0.00318 |       0.00263 |       0.75993
     -0.01059 |       0.00000 |       0.00315 |       0.00287 |       0.76146
     -0.01059 |       0.00000 |       0.00305 |       0.00333 |       0.75981
Evaluating losses...
     -0.01154 |       0.00000 |       0.00296 |       0.00322 |     

     -0.01027 |       0.00000 |       0.00079 |       0.00309 |       0.77168
     -0.01034 |       0.00000 |       0.00078 |       0.00345 |       0.77325
Evaluating losses...
     -0.01191 |       0.00000 |       0.00075 |       0.00350 |       0.77391
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.2           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 3986          |
| TimeElapsed     | 1.43e+04      |
| TimestepsSoFar  | 9936896       |
| ev_tdlam_before | 0.495         |
| loss_ent        | 0.77390814    |
| loss_kl         | 0.0035002003  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.011914737  |
| loss_vf_loss    | 0.00074624224 |
-----------------------------------
********** Iteration 2426 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00055 |       0.00000 |       0.00214 |       0.00047 |       0.78222
     -0.00266 |       0.00000 | 

********** Iteration 2431 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00237 |       0.00000 |       0.00630 |       0.00216 |       0.75910
     -0.00360 |       0.00000 |       0.00609 |       0.00204 |       0.76115
     -0.00602 |       0.00000 |       0.00598 |       0.00247 |       0.76396
     -0.00733 |       0.00000 |       0.00584 |       0.00250 |       0.76276
     -0.00865 |       0.00000 |       0.00574 |       0.00310 |       0.76251
     -0.00896 |       0.00000 |       0.00561 |       0.00318 |       0.76201
     -0.00959 |       0.00000 |       0.00557 |       0.00330 |       0.76218
     -0.01022 |       0.00000 |       0.00546 |       0.00365 |       0.76130
     -0.01034 |       0.00000 |       0.00539 |       0.00385 |       0.75956
     -0.01109 |       0.00000 |       0.00532 |       0.00378 |       0.76040
Evaluating losses...
     -0.01148 |       0.00000 |       0.00519 |       0.00394 |     

     -0.00771 |       0.00000 |       0.00181 |       0.00282 |       0.76390
     -0.00795 |       0.00000 |       0.00175 |       0.00319 |       0.76403
     -0.00892 |       0.00000 |       0.00171 |       0.00324 |       0.76490
Evaluating losses...
     -0.00981 |       0.00000 |       0.00167 |       0.00382 |       0.76505
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.25         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4001         |
| TimeElapsed     | 1.44e+04     |
| TimestepsSoFar  | 9981952      |
| ev_tdlam_before | 0.605        |
| loss_ent        | 0.76504815   |
| loss_kl         | 0.0038232675 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009812282 |
| loss_vf_loss    | 0.0016659347 |
----------------------------------
********** Iteration 2437 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00022 |       0.00000 |       0.00374 

********** Iteration 2442 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00180 |       0.00000 |       0.00248 |       0.00101 |       0.75859
     -0.00423 |       0.00000 |       0.00199 |       0.00208 |       0.75732
     -0.00703 |       0.00000 |       0.00179 |       0.00213 |       0.75843
     -0.00793 |       0.00000 |       0.00168 |       0.00265 |       0.75787
     -0.00871 |       0.00000 |       0.00159 |       0.00284 |       0.75692
     -0.00969 |       0.00000 |       0.00153 |       0.00300 |       0.75741
     -0.01023 |       0.00000 |       0.00154 |       0.00314 |       0.75690
     -0.01019 |       0.00000 |       0.00147 |       0.00352 |       0.75605
     -0.01062 |       0.00000 |       0.00144 |       0.00369 |       0.75651
     -0.01086 |       0.00000 |       0.00141 |       0.00421 |       0.75816
Evaluating losses...
     -0.01192 |       0.00000 |       0.00139 |       0.00438 |     

     -0.00877 |       0.00000 |       0.00253 |       0.00306 |       0.75751
     -0.00841 |       0.00000 |       0.00250 |       0.00328 |       0.75730
Evaluating losses...
     -0.00867 |       0.00000 |       0.00245 |       0.00381 |       0.75648
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.27         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4016         |
| TimeElapsed     | 1.46e+04     |
| TimestepsSoFar  | 10027008     |
| ev_tdlam_before | 0.595        |
| loss_ent        | 0.7564794    |
| loss_kl         | 0.0038084483 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008674188 |
| loss_vf_loss    | 0.0024477283 |
----------------------------------
********** Iteration 2448 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     5.00e-05 |       0.00000 |       0.00014 |       0.00104 |       0.74540
     -0.00522 |       0.00000 |      8.49e-05 

********** Iteration 2453 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.86e-05 |       0.00000 |      8.07e-05 |       0.00125 |       0.77240
     -0.00694 |       0.00000 |      5.85e-05 |       0.00215 |       0.77241
     -0.00946 |       0.00000 |      5.13e-05 |       0.00233 |       0.77416
     -0.01089 |       0.00000 |      4.65e-05 |       0.00274 |       0.77454
     -0.01269 |       0.00000 |      4.26e-05 |       0.00287 |       0.77588
     -0.01403 |       0.00000 |      3.97e-05 |       0.00355 |       0.77528
     -0.01458 |       0.00000 |      3.80e-05 |       0.00374 |       0.77458
     -0.01538 |       0.00000 |      3.56e-05 |       0.00383 |       0.77560
     -0.01623 |       0.00000 |      3.44e-05 |       0.00417 |       0.77580
     -0.01718 |       0.00000 |      3.23e-05 |       0.00437 |       0.77607
Evaluating losses...
     -0.01837 |       0.00000 |      3.16e-05 |       0.00425 |     

     -0.00926 |       0.00000 |       0.00266 |       0.00300 |       0.74809
     -0.00920 |       0.00000 |       0.00255 |       0.00337 |       0.74957
     -0.01007 |       0.00000 |       0.00245 |       0.00327 |       0.74797
Evaluating losses...
     -0.01022 |       0.00000 |       0.00234 |       0.00348 |       0.74892
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.28         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4031         |
| TimeElapsed     | 1.46e+04     |
| TimestepsSoFar  | 10072064     |
| ev_tdlam_before | 0.145        |
| loss_ent        | 0.7489167    |
| loss_kl         | 0.0034844393 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010217485 |
| loss_vf_loss    | 0.0023426681 |
----------------------------------
********** Iteration 2459 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00182 |       0.00000 |       0.00025 

********** Iteration 2464 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00130 |       0.00000 |      2.58e-05 |       0.00088 |       0.78810
     -0.00365 |       0.00000 |      1.85e-05 |       0.00118 |       0.78843
     -0.00561 |       0.00000 |      1.57e-05 |       0.00165 |       0.78802
     -0.00681 |       0.00000 |      1.40e-05 |       0.00237 |       0.78546
     -0.00720 |       0.00000 |      1.25e-05 |       0.00233 |       0.78915
     -0.00823 |       0.00000 |      1.20e-05 |       0.00244 |       0.78724
     -0.00945 |       0.00000 |      1.11e-05 |       0.00258 |       0.78776
     -0.00949 |       0.00000 |      1.09e-05 |       0.00292 |       0.78856
     -0.01052 |       0.00000 |      1.04e-05 |       0.00315 |       0.78665
     -0.01065 |       0.00000 |      1.01e-05 |       0.00333 |       0.78889
Evaluating losses...
     -0.01153 |       0.00000 |      9.52e-06 |       0.00384 |     

     -0.00741 |       0.00000 |       0.00146 |       0.00502 |       0.80006
     -0.00778 |       0.00000 |       0.00141 |       0.00533 |       0.79899
     -0.00826 |       0.00000 |       0.00140 |       0.00544 |       0.79903
Evaluating losses...
     -0.00874 |       0.00000 |       0.00135 |       0.00559 |       0.79946
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.24         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4046         |
| TimeElapsed     | 1.47e+04     |
| TimestepsSoFar  | 10117120     |
| ev_tdlam_before | 0.31         |
| loss_ent        | 0.79945755   |
| loss_kl         | 0.005594476  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008743005 |
| loss_vf_loss    | 0.0013455197 |
----------------------------------
********** Iteration 2470 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     4.05e-05 |       0.00000 |       0.00042 

********** Iteration 2475 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00093 |       0.00000 |       0.00169 |       0.00926 |       0.79501
     -0.00316 |       0.00000 |       0.00157 |       0.00605 |       0.79428
     -0.00408 |       0.00000 |       0.00159 |       0.00478 |       0.79614
     -0.00495 |       0.00000 |       0.00151 |       0.00395 |       0.79660
     -0.00484 |       0.00000 |       0.00149 |       0.00341 |       0.79679
     -0.00562 |       0.00000 |       0.00151 |       0.00303 |       0.79786
     -0.00607 |       0.00000 |       0.00150 |       0.00273 |       0.79755
     -0.00574 |       0.00000 |       0.00154 |       0.00271 |       0.79910
     -0.00632 |       0.00000 |       0.00149 |       0.00370 |       0.79772
     -0.00614 |       0.00000 |       0.00149 |       0.00299 |       0.79958
Evaluating losses...
     -0.00719 |       0.00000 |       0.00147 |       0.00300 |     

     -0.00594 |       0.00000 |       0.00029 |       0.00225 |       0.79829
     -0.00555 |       0.00000 |       0.00029 |       0.00206 |       0.79650
     -0.00613 |       0.00000 |       0.00028 |       0.00212 |       0.79772
Evaluating losses...
     -0.00644 |       0.00000 |       0.00028 |       0.00231 |       0.79710
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.21          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 4061          |
| TimeElapsed     | 1.47e+04      |
| TimestepsSoFar  | 10162176      |
| ev_tdlam_before | 0.0886        |
| loss_ent        | 0.7971035     |
| loss_kl         | 0.0023089712  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0064445846 |
| loss_vf_loss    | 0.00027846382 |
-----------------------------------
********** Iteration 2481 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00145 |       0.00000 | 

********** Iteration 2486 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     9.19e-05 |       0.00000 |       0.00348 |       0.00243 |       0.80337
     -0.00533 |       0.00000 |       0.00322 |       0.00337 |       0.80528
     -0.00670 |       0.00000 |       0.00308 |       0.00278 |       0.80373
     -0.00784 |       0.00000 |       0.00297 |       0.00254 |       0.80465
     -0.00867 |       0.00000 |       0.00288 |       0.00279 |       0.80429
     -0.00944 |       0.00000 |       0.00277 |       0.00292 |       0.80463
     -0.00993 |       0.00000 |       0.00274 |       0.00311 |       0.80424
     -0.00998 |       0.00000 |       0.00272 |       0.00297 |       0.80455
     -0.01055 |       0.00000 |       0.00267 |       0.00336 |       0.80432
     -0.01072 |       0.00000 |       0.00262 |       0.00353 |       0.80441
Evaluating losses...
     -0.01196 |       0.00000 |       0.00255 |       0.00365 |     

     -0.00632 |       0.00000 |       0.00169 |       0.00417 |       0.77947
     -0.00658 |       0.00000 |       0.00166 |       0.00454 |       0.77952
     -0.00712 |       0.00000 |       0.00163 |       0.00450 |       0.77892
Evaluating losses...
     -0.00725 |       0.00000 |       0.00160 |       0.00456 |       0.77749
---------------------------------
| EpLenMean       | 3.01e+03    |
| EpRewMean       | 0.13        |
| EpThisIter      | 2           |
| EpisodesSoFar   | 4076        |
| TimeElapsed     | 1.47e+04    |
| TimestepsSoFar  | 10207232    |
| ev_tdlam_before | 0.369       |
| loss_ent        | 0.77748835  |
| loss_kl         | 0.004564744 |
| loss_pol_entpen | 0.0         |
| loss_pol_surr   | -0.00725495 |
| loss_vf_loss    | 0.001597459 |
---------------------------------
********** Iteration 2492 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00028 |       0.00000 |       0.00154 |       0.0005

********** Iteration 2497 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00111 |       0.00000 |       0.00300 |       0.00169 |       0.80092
     -0.00414 |       0.00000 |       0.00247 |       0.00233 |       0.79965
     -0.00600 |       0.00000 |       0.00238 |       0.00287 |       0.80342
     -0.00648 |       0.00000 |       0.00226 |       0.00369 |       0.80170
     -0.00716 |       0.00000 |       0.00219 |       0.00348 |       0.80282
     -0.00758 |       0.00000 |       0.00216 |       0.00469 |       0.80587
     -0.00821 |       0.00000 |       0.00214 |       0.00340 |       0.80179
     -0.00833 |       0.00000 |       0.00211 |       0.00370 |       0.80401
     -0.00856 |       0.00000 |       0.00206 |       0.00371 |       0.80295
     -0.00882 |       0.00000 |       0.00204 |       0.00406 |       0.80446
Evaluating losses...
     -0.00932 |       0.00000 |       0.00196 |       0.00372 |     

     -0.00842 |       0.00000 |       0.00175 |       0.00257 |       0.80144
     -0.00929 |       0.00000 |       0.00173 |       0.00279 |       0.80138
     -0.00898 |       0.00000 |       0.00164 |       0.00348 |       0.80368
     -0.00897 |       0.00000 |       0.00159 |       0.00361 |       0.80315
Evaluating losses...
     -0.01006 |       0.00000 |       0.00153 |       0.00358 |       0.80223
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.08         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4090         |
| TimeElapsed     | 1.5e+04      |
| TimestepsSoFar  | 10252288     |
| ev_tdlam_before | 0.518        |
| loss_ent        | 0.8022263    |
| loss_kl         | 0.0035785655 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010063452 |
| loss_vf_loss    | 0.0015257753 |
----------------------------------
********** Iteration 2503 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 2508 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00046 |       0.00000 |       0.00362 |       0.00156 |       0.80783
     -0.00574 |       0.00000 |       0.00316 |       0.00173 |       0.80529
     -0.00686 |       0.00000 |       0.00292 |       0.00241 |       0.80529
     -0.00698 |       0.00000 |       0.00274 |       0.00248 |       0.80545
     -0.00793 |       0.00000 |       0.00263 |       0.00269 |       0.80499
     -0.00862 |       0.00000 |       0.00252 |       0.00315 |       0.80646
     -0.00871 |       0.00000 |       0.00244 |       0.00379 |       0.80672
     -0.00895 |       0.00000 |       0.00241 |       0.00399 |       0.80540
     -0.00945 |       0.00000 |       0.00232 |       0.00478 |       0.80661
     -0.00975 |       0.00000 |       0.00228 |       0.00420 |       0.80743
Evaluating losses...
     -0.01046 |       0.00000 |       0.00215 |       0.00447 |     

     -0.01017 |       0.00000 |       0.00237 |       0.00333 |       0.80135
     -0.01046 |       0.00000 |       0.00232 |       0.00344 |       0.80121
     -0.01048 |       0.00000 |       0.00228 |       0.00371 |       0.80165
Evaluating losses...
     -0.01095 |       0.00000 |       0.00217 |       0.00450 |       0.80332
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.06         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4105         |
| TimeElapsed     | 1.5e+04      |
| TimestepsSoFar  | 10297344     |
| ev_tdlam_before | 0.429        |
| loss_ent        | 0.8033233    |
| loss_kl         | 0.0044999565 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010948111 |
| loss_vf_loss    | 0.0021693865 |
----------------------------------
********** Iteration 2514 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00147 |       0.00000 |       0.00236 

********** Iteration 2519 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     3.27e-05 |       0.00000 |       0.00095 |       0.00052 |       0.81244
     -0.00168 |       0.00000 |       0.00050 |       0.00110 |       0.81285
     -0.00229 |       0.00000 |       0.00044 |       0.00135 |       0.81212
     -0.00268 |       0.00000 |       0.00042 |       0.00153 |       0.81235
     -0.00288 |       0.00000 |       0.00042 |       0.00176 |       0.81207
     -0.00319 |       0.00000 |       0.00040 |       0.00171 |       0.81123
     -0.00342 |       0.00000 |       0.00040 |       0.00195 |       0.81132
     -0.00358 |       0.00000 |       0.00039 |       0.00198 |       0.81184
     -0.00378 |       0.00000 |       0.00039 |       0.00197 |       0.81090
     -0.00396 |       0.00000 |       0.00039 |       0.00212 |       0.81183
Evaluating losses...
     -0.00433 |       0.00000 |       0.00037 |       0.00217 |     

     -0.00868 |       0.00000 |       0.00082 |       0.00309 |       0.80824
     -0.00877 |       0.00000 |       0.00080 |       0.00322 |       0.80917
Evaluating losses...
     -0.00940 |       0.00000 |       0.00076 |       0.00322 |       0.80945
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.04         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4120         |
| TimeElapsed     | 1.5e+04      |
| TimestepsSoFar  | 10342400     |
| ev_tdlam_before | 0.625        |
| loss_ent        | 0.8094488    |
| loss_kl         | 0.0032242918 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.00940294  |
| loss_vf_loss    | 0.0007563742 |
----------------------------------
********** Iteration 2525 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00129 |       0.00000 |       0.00025 |       0.00155 |       0.79112
     -0.00187 |       0.00000 |       0.00017 

********** Iteration 2530 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00074 |       0.00000 |       0.00187 |       0.01081 |       0.81683
     -0.00309 |       0.00000 |       0.00178 |       0.01174 |       0.82262
     -0.00560 |       0.00000 |       0.00174 |       0.00397 |       0.81777
     -0.00634 |       0.00000 |       0.00171 |       0.00356 |       0.81732
     -0.00672 |       0.00000 |       0.00171 |       0.00337 |       0.81832
     -0.00713 |       0.00000 |       0.00167 |       0.00346 |       0.81929
     -0.00729 |       0.00000 |       0.00164 |       0.00330 |       0.81843
     -0.00762 |       0.00000 |       0.00163 |       0.00317 |       0.81745
     -0.00790 |       0.00000 |       0.00161 |       0.00339 |       0.81822
     -0.00801 |       0.00000 |       0.00158 |       0.00349 |       0.81699
Evaluating losses...
     -0.00864 |       0.00000 |       0.00155 |       0.00321 |     

     -0.00810 |       0.00000 |      5.55e-05 |       0.00263 |       0.80032
     -0.00830 |       0.00000 |      5.29e-05 |       0.00320 |       0.79985
     -0.00864 |       0.00000 |      4.89e-05 |       0.00305 |       0.80044
Evaluating losses...
     -0.00934 |       0.00000 |      4.68e-05 |       0.00326 |       0.80044
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.06         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4135         |
| TimeElapsed     | 1.51e+04     |
| TimestepsSoFar  | 10387456     |
| ev_tdlam_before | -2.62        |
| loss_ent        | 0.80043906   |
| loss_kl         | 0.0032595058 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009339042 |
| loss_vf_loss    | 4.681623e-05 |
----------------------------------
********** Iteration 2536 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00110 |       0.00000 |      7.51e-05 

********** Iteration 2541 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00026 |       0.00000 |       0.00166 |       0.00046 |       0.75722
     -0.00341 |       0.00000 |       0.00099 |       0.00127 |       0.75777
     -0.00499 |       0.00000 |       0.00081 |       0.00135 |       0.75479
     -0.00562 |       0.00000 |       0.00070 |       0.00140 |       0.75664
     -0.00573 |       0.00000 |       0.00068 |       0.00164 |       0.75629
     -0.00649 |       0.00000 |       0.00062 |       0.00167 |       0.75721
     -0.00669 |       0.00000 |       0.00058 |       0.00190 |       0.75701
     -0.00718 |       0.00000 |       0.00052 |       0.00225 |       0.75696
     -0.00689 |       0.00000 |       0.00050 |       0.00254 |       0.75719
     -0.00762 |       0.00000 |       0.00048 |       0.00264 |       0.75603
Evaluating losses...
     -0.00765 |       0.00000 |       0.00044 |       0.00278 |     

     -0.00652 |       0.00000 |       0.00113 |       0.00283 |       0.80369
     -0.00688 |       0.00000 |       0.00109 |       0.00298 |       0.80379
     -0.00685 |       0.00000 |       0.00106 |       0.00312 |       0.80377
Evaluating losses...
     -0.00767 |       0.00000 |       0.00103 |       0.00322 |       0.80418
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.08         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4150         |
| TimeElapsed     | 1.51e+04     |
| TimestepsSoFar  | 10432512     |
| ev_tdlam_before | 0.253        |
| loss_ent        | 0.8041823    |
| loss_kl         | 0.0032169067 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.007669784 |
| loss_vf_loss    | 0.0010320321 |
----------------------------------
********** Iteration 2547 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00031 |       0.00000 |       0.00507 

********** Iteration 2552 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00035 |       0.00000 |      3.22e-05 |       0.00098 |       0.81124
     -0.00544 |       0.00000 |      2.59e-05 |       0.00183 |       0.81296
     -0.00821 |       0.00000 |      2.31e-05 |       0.00219 |       0.81270
     -0.00948 |       0.00000 |      2.17e-05 |       0.00248 |       0.81301
     -0.01109 |       0.00000 |      2.11e-05 |       0.00289 |       0.81320
     -0.01203 |       0.00000 |      2.01e-05 |       0.00287 |       0.81353
     -0.01327 |       0.00000 |      1.90e-05 |       0.00328 |       0.81401
     -0.01390 |       0.00000 |      1.82e-05 |       0.00328 |       0.81469
     -0.01456 |       0.00000 |      1.85e-05 |       0.00389 |       0.81433
     -0.01513 |       0.00000 |      1.71e-05 |       0.00399 |       0.81440
Evaluating losses...
     -0.01697 |       0.00000 |      1.66e-05 |       0.00374 |     

     -0.00738 |       0.00000 |       0.00062 |       0.00345 |       0.77636
     -0.00768 |       0.00000 |       0.00062 |       0.00339 |       0.77617
     -0.00898 |       0.00000 |       0.00061 |       0.00378 |       0.77697
Evaluating losses...
     -0.00936 |       0.00000 |       0.00059 |       0.00355 |       0.77710
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.08          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4165          |
| TimeElapsed     | 1.52e+04      |
| TimestepsSoFar  | 10477568      |
| ev_tdlam_before | 0.212         |
| loss_ent        | 0.7770986     |
| loss_kl         | 0.0035480855  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.00936319   |
| loss_vf_loss    | 0.00058790063 |
-----------------------------------
********** Iteration 2558 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00060 |       0.00000 | 

********** Iteration 2563 ************
Eval num_timesteps=10498048, episode_reward=-0.27 +/- 1.20
Episode length: 2998.08 +/- 19.10
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00359 |       0.00000 |       0.00010 |       0.00165 |       0.79456
     -0.00210 |       0.00000 |      5.05e-05 |       0.00271 |       0.79302
     -0.00322 |       0.00000 |      3.67e-05 |       0.00200 |       0.79392
     -0.00397 |       0.00000 |      3.02e-05 |       0.00191 |       0.79238
     -0.00465 |       0.00000 |      2.64e-05 |       0.00196 |       0.79347
     -0.00454 |       0.00000 |      2.43e-05 |       0.00270 |       0.79442
     -0.00527 |       0.00000 |      2.25e-05 |       0.00228 |       0.79476
     -0.00573 |       0.00000 |      2.12e-05 |       0.00236 |       0.79529
     -0.00604 |       0.00000 |      2.05e-05 |       0.00240 |       0.79371
     -0.00637 |       0.00000 |      1.92e-05 |       0.00276 |       0.79

     -0.00737 |       0.00000 |       0.00257 |       0.00227 |       0.81104
     -0.00826 |       0.00000 |       0.00249 |       0.00241 |       0.81114
     -0.00846 |       0.00000 |       0.00242 |       0.00245 |       0.81194
     -0.00821 |       0.00000 |       0.00240 |       0.00250 |       0.81251
Evaluating losses...
     -0.00909 |       0.00000 |       0.00228 |       0.00281 |       0.81297
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.08         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4180         |
| TimeElapsed     | 1.54e+04     |
| TimestepsSoFar  | 10522624     |
| ev_tdlam_before | 0.366        |
| loss_ent        | 0.8129679    |
| loss_kl         | 0.0028148382 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009089433 |
| loss_vf_loss    | 0.0022839312 |
----------------------------------
********** Iteration 2569 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 2574 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00030 |       0.00000 |       0.00209 |       0.00085 |       0.77241
     -0.00273 |       0.00000 |       0.00159 |       0.00108 |       0.77090
     -0.00317 |       0.00000 |       0.00151 |       0.00122 |       0.77227
     -0.00391 |       0.00000 |       0.00145 |       0.00134 |       0.77077
     -0.00415 |       0.00000 |       0.00141 |       0.00152 |       0.77091
     -0.00465 |       0.00000 |       0.00138 |       0.00150 |       0.77234
     -0.00477 |       0.00000 |       0.00135 |       0.00185 |       0.77264
     -0.00501 |       0.00000 |       0.00131 |       0.00195 |       0.77259
     -0.00530 |       0.00000 |       0.00128 |       0.00218 |       0.77156
     -0.00564 |       0.00000 |       0.00124 |       0.00204 |       0.77241
Evaluating losses...
     -0.00615 |       0.00000 |       0.00119 |       0.00214 |     

     -0.00634 |       0.00000 |       0.00090 |       0.00267 |       0.77043
     -0.00633 |       0.00000 |       0.00086 |       0.00264 |       0.77198
     -0.00667 |       0.00000 |       0.00087 |       0.00275 |       0.77081
Evaluating losses...
     -0.00694 |       0.00000 |       0.00088 |       0.00262 |       0.77032
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.17          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4195          |
| TimeElapsed     | 1.54e+04      |
| TimestepsSoFar  | 10567680      |
| ev_tdlam_before | 0.312         |
| loss_ent        | 0.7703246     |
| loss_kl         | 0.0026213871  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0069361525 |
| loss_vf_loss    | 0.00088173215 |
-----------------------------------
********** Iteration 2580 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00061 |       0.00000 | 

********** Iteration 2585 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00056 |       0.00000 |       0.00294 |       0.00366 |       0.75686
     -0.00251 |       0.00000 |       0.00271 |       0.00355 |       0.75413
     -0.00319 |       0.00000 |       0.00263 |       0.00561 |       0.76001
     -0.00409 |       0.00000 |       0.00259 |       0.00736 |       0.75208
     -0.00669 |       0.00000 |       0.00258 |       0.00385 |       0.75719
     -0.00812 |       0.00000 |       0.00254 |       0.00354 |       0.75592
     -0.00755 |       0.00000 |       0.00251 |       0.00354 |       0.75631
     -0.00805 |       0.00000 |       0.00249 |       0.00422 |       0.75657
     -0.00930 |       0.00000 |       0.00248 |       0.00344 |       0.75848
     -0.00948 |       0.00000 |       0.00245 |       0.00334 |       0.75649
Evaluating losses...
     -0.00977 |       0.00000 |       0.00240 |       0.00403 |     

     -0.00855 |       0.00000 |       0.00211 |       0.00340 |       0.78293
     -0.00882 |       0.00000 |       0.00204 |       0.00429 |       0.78087
     -0.00874 |       0.00000 |       0.00199 |       0.00402 |       0.78299
Evaluating losses...
     -0.00989 |       0.00000 |       0.00188 |       0.00383 |       0.78388
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.09         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4210         |
| TimeElapsed     | 1.55e+04     |
| TimestepsSoFar  | 10612736     |
| ev_tdlam_before | 0.37         |
| loss_ent        | 0.78388387   |
| loss_kl         | 0.003825367  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.00989073  |
| loss_vf_loss    | 0.0018846077 |
----------------------------------
********** Iteration 2591 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00139 |       0.00000 |       0.00288 

********** Iteration 2596 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00149 |       0.00000 |       0.00190 |       0.00181 |       0.76309
     -0.00286 |       0.00000 |       0.00152 |       0.00147 |       0.76152
     -0.00397 |       0.00000 |       0.00138 |       0.00195 |       0.76185
     -0.00363 |       0.00000 |       0.00129 |       0.00251 |       0.76415
     -0.00405 |       0.00000 |       0.00125 |       0.00195 |       0.76284
     -0.00462 |       0.00000 |       0.00123 |       0.00199 |       0.76172
     -0.00478 |       0.00000 |       0.00120 |       0.00213 |       0.76030
     -0.00520 |       0.00000 |       0.00116 |       0.00251 |       0.76197
     -0.00438 |       0.00000 |       0.00114 |       0.00282 |       0.76377
     -0.00468 |       0.00000 |       0.00112 |       0.00320 |       0.76560
Evaluating losses...
     -0.00530 |       0.00000 |       0.00109 |       0.00289 |     

     -0.00997 |       0.00000 |      4.26e-05 |       0.00446 |       0.81747
     -0.01001 |       0.00000 |      3.99e-05 |       0.00473 |       0.81823
Evaluating losses...
     -0.01117 |       0.00000 |      3.86e-05 |       0.00457 |       0.81782
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.03          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4225          |
| TimeElapsed     | 1.55e+04      |
| TimestepsSoFar  | 10657792      |
| ev_tdlam_before | -2.68         |
| loss_ent        | 0.8178189     |
| loss_kl         | 0.0045720097  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.011173394  |
| loss_vf_loss    | 3.8640886e-05 |
-----------------------------------
********** Iteration 2602 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00351 |       0.00000 |       0.00210 |       0.00087 |       0.81762
     -0.00067 |       0.00000 | 

********** Iteration 2607 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00035 |       0.00000 |       0.00033 |       0.00052 |       0.78364
     -0.00396 |       0.00000 |       0.00014 |       0.00150 |       0.78274
     -0.00516 |       0.00000 |       0.00011 |       0.00170 |       0.78296
     -0.00609 |       0.00000 |      9.67e-05 |       0.00187 |       0.78455
     -0.00647 |       0.00000 |      8.66e-05 |       0.00211 |       0.78295
     -0.00700 |       0.00000 |      7.82e-05 |       0.00254 |       0.78245
     -0.00749 |       0.00000 |      7.33e-05 |       0.00252 |       0.78152
     -0.00765 |       0.00000 |      6.83e-05 |       0.00274 |       0.78281
     -0.00799 |       0.00000 |      6.30e-05 |       0.00291 |       0.78183
     -0.00803 |       0.00000 |      5.96e-05 |       0.00316 |       0.78149
Evaluating losses...
     -0.00875 |       0.00000 |      5.75e-05 |       0.00327 |     

     -0.00437 |       0.00000 |       0.00032 |       0.00190 |       0.79046
     -0.00674 |       0.00000 |       0.00032 |       0.00203 |       0.79168
     -0.00570 |       0.00000 |       0.00031 |       0.00217 |       0.79175
     -0.00625 |       0.00000 |       0.00029 |       0.00221 |       0.79243
Evaluating losses...
     -0.00713 |       0.00000 |       0.00028 |       0.00230 |       0.79234
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.06         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4240         |
| TimeElapsed     | 1.56e+04     |
| TimestepsSoFar  | 10702848     |
| ev_tdlam_before | 0.196        |
| loss_ent        | 0.79234      |
| loss_kl         | 0.0022998697 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.007129619 |
| loss_vf_loss    | 0.0002798784 |
----------------------------------
********** Iteration 2613 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 2618 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00133 |       0.00000 |       0.00191 |       0.00121 |       0.80518
     -0.00388 |       0.00000 |       0.00095 |       0.00158 |       0.80762
     -0.00502 |       0.00000 |       0.00088 |       0.00182 |       0.80861
     -0.00628 |       0.00000 |       0.00083 |       0.00203 |       0.81034
     -0.00650 |       0.00000 |       0.00080 |       0.00232 |       0.81132
     -0.00724 |       0.00000 |       0.00076 |       0.00243 |       0.81106
     -0.00787 |       0.00000 |       0.00074 |       0.00242 |       0.81136
     -0.00808 |       0.00000 |       0.00072 |       0.00278 |       0.81170
     -0.00780 |       0.00000 |       0.00070 |       0.00292 |       0.80995
     -0.00822 |       0.00000 |       0.00070 |       0.00284 |       0.81066
Evaluating losses...
     -0.00916 |       0.00000 |       0.00064 |       0.00286 |     

     -0.00747 |       0.00000 |       0.00290 |       0.00421 |       0.81523
     -0.00765 |       0.00000 |       0.00285 |       0.00442 |       0.81532
     -0.00754 |       0.00000 |       0.00281 |       0.00492 |       0.81640
Evaluating losses...
     -0.00818 |       0.00000 |       0.00274 |       0.00456 |       0.81650
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.07         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4255         |
| TimeElapsed     | 1.56e+04     |
| TimestepsSoFar  | 10747904     |
| ev_tdlam_before | 0.409        |
| loss_ent        | 0.81650025   |
| loss_kl         | 0.004561513  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008179451 |
| loss_vf_loss    | 0.0027371752 |
----------------------------------
********** Iteration 2624 ************
Eval num_timesteps=10747904, episode_reward=0.17 +/- 0.72
Episode length: 3000.00 +/- 0.00
Optimizing...
     pol_surr |    pol_entpen |  

********** Iteration 2629 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00204 |       0.00000 |      2.39e-05 |       0.00169 |       0.82060
     -0.00620 |       0.00000 |      1.86e-05 |       0.00164 |       0.81886
     -0.00766 |       0.00000 |      1.58e-05 |       0.00239 |       0.81759
     -0.00904 |       0.00000 |      1.44e-05 |       0.00236 |       0.81664
     -0.01022 |       0.00000 |      1.34e-05 |       0.00254 |       0.81650
     -0.01090 |       0.00000 |      1.27e-05 |       0.00300 |       0.81713
     -0.01127 |       0.00000 |      1.19e-05 |       0.00307 |       0.81569
     -0.01262 |       0.00000 |      1.14e-05 |       0.00304 |       0.81567
     -0.01288 |       0.00000 |      1.12e-05 |       0.00327 |       0.81595
     -0.01402 |       0.00000 |      1.07e-05 |       0.00336 |       0.81553
Evaluating losses...
     -0.01449 |       0.00000 |      1.03e-05 |       0.00335 |     

     -0.00869 |       0.00000 |       0.00255 |       0.00362 |       0.75917
     -0.00930 |       0.00000 |       0.00248 |       0.00402 |       0.75917
     -0.00948 |       0.00000 |       0.00238 |       0.00408 |       0.75854
Evaluating losses...
     -0.00992 |       0.00000 |       0.00230 |       0.00422 |       0.75915
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.02         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4270         |
| TimeElapsed     | 1.59e+04     |
| TimestepsSoFar  | 10792960     |
| ev_tdlam_before | 0.389        |
| loss_ent        | 0.75915056   |
| loss_kl         | 0.004222098  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009920715 |
| loss_vf_loss    | 0.0022964762 |
----------------------------------
********** Iteration 2635 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     9.41e-05 |       0.00000 |       0.00380 

********** Iteration 2640 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00125 |       0.00000 |       0.00258 |       0.00083 |       0.82771
     -0.00415 |       0.00000 |       0.00223 |       0.00150 |       0.82610
     -0.00638 |       0.00000 |       0.00198 |       0.00177 |       0.82561
     -0.00843 |       0.00000 |       0.00182 |       0.00202 |       0.82466
     -0.00894 |       0.00000 |       0.00170 |       0.00253 |       0.82375
     -0.01005 |       0.00000 |       0.00159 |       0.00292 |       0.82219
     -0.01053 |       0.00000 |       0.00150 |       0.00311 |       0.82374
     -0.01163 |       0.00000 |       0.00142 |       0.00299 |       0.82265
     -0.01205 |       0.00000 |       0.00135 |       0.00333 |       0.82271
     -0.01265 |       0.00000 |       0.00129 |       0.00333 |       0.82225
Evaluating losses...
     -0.01381 |       0.00000 |       0.00124 |       0.00345 |     

     -0.00708 |       0.00000 |      9.48e-05 |       0.00233 |       0.82054
     -0.00817 |       0.00000 |      9.29e-05 |       0.00253 |       0.82145
     -0.00779 |       0.00000 |      9.28e-05 |       0.00254 |       0.82031
Evaluating losses...
     -0.00826 |       0.00000 |      9.02e-05 |       0.00263 |       0.81960
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.19         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4285         |
| TimeElapsed     | 1.59e+04     |
| TimestepsSoFar  | 10838016     |
| ev_tdlam_before | -2.19        |
| loss_ent        | 0.81960297   |
| loss_kl         | 0.0026334256 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.00826232  |
| loss_vf_loss    | 9.018177e-05 |
----------------------------------
********** Iteration 2646 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00012 |       0.00000 |      9.63e-05 

********** Iteration 2651 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00040 |       0.00000 |       0.00047 |       0.00157 |       0.78127
     -0.00519 |       0.00000 |       0.00028 |       0.00240 |       0.78145
     -0.00697 |       0.00000 |       0.00022 |       0.00231 |       0.78205
     -0.00865 |       0.00000 |       0.00019 |       0.00249 |       0.78131
     -0.00946 |       0.00000 |       0.00017 |       0.00285 |       0.78327
     -0.01013 |       0.00000 |       0.00015 |       0.00306 |       0.78111
     -0.01067 |       0.00000 |       0.00015 |       0.00300 |       0.78222
     -0.01121 |       0.00000 |       0.00014 |       0.00323 |       0.78235
     -0.01162 |       0.00000 |       0.00013 |       0.00354 |       0.78205
     -0.01162 |       0.00000 |       0.00012 |       0.00371 |       0.78055
Evaluating losses...
     -0.01238 |       0.00000 |       0.00012 |       0.00412 |     

     -0.00929 |       0.00000 |       0.00110 |       0.00290 |       0.79142
     -0.00966 |       0.00000 |       0.00108 |       0.00299 |       0.79102
     -0.00975 |       0.00000 |       0.00105 |       0.00324 |       0.79042
Evaluating losses...
     -0.01072 |       0.00000 |       0.00099 |       0.00335 |       0.79035
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.15         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4300         |
| TimeElapsed     | 1.6e+04      |
| TimestepsSoFar  | 10883072     |
| ev_tdlam_before | 0.523        |
| loss_ent        | 0.7903463    |
| loss_kl         | 0.0033544335 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010716838 |
| loss_vf_loss    | 0.0009852938 |
----------------------------------
********** Iteration 2657 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00144 |       0.00000 |       0.00258 

********** Iteration 2662 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00070 |       0.00000 |       0.00206 |       0.00377 |       0.79640
     -0.00253 |       0.00000 |       0.00182 |       0.00690 |       0.79619
     -0.00422 |       0.00000 |       0.00169 |       0.00491 |       0.79604
     -0.00559 |       0.00000 |       0.00163 |       0.00376 |       0.79669
     -0.00616 |       0.00000 |       0.00161 |       0.00242 |       0.79802
     -0.00689 |       0.00000 |       0.00147 |       0.00225 |       0.79862
     -0.00737 |       0.00000 |       0.00134 |       0.00274 |       0.79812
     -0.00770 |       0.00000 |       0.00125 |       0.00259 |       0.79874
     -0.00765 |       0.00000 |       0.00120 |       0.00332 |       0.79672
     -0.00807 |       0.00000 |       0.00119 |       0.00300 |       0.79614
Evaluating losses...
     -0.00851 |       0.00000 |       0.00111 |       0.00281 |     

     -0.00484 |       0.00000 |       0.00057 |       0.00213 |       0.82395
     -0.00452 |       0.00000 |       0.00056 |       0.00214 |       0.82482
     -0.00480 |       0.00000 |       0.00056 |       0.00232 |       0.82536
Evaluating losses...
     -0.00570 |       0.00000 |       0.00052 |       0.00229 |       0.82506
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.19          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4315          |
| TimeElapsed     | 1.6e+04       |
| TimestepsSoFar  | 10928128      |
| ev_tdlam_before | 0.25          |
| loss_ent        | 0.8250636     |
| loss_kl         | 0.0022863904  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0056996085 |
| loss_vf_loss    | 0.0005223273  |
-----------------------------------
********** Iteration 2668 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00100 |       0.00000 | 

********** Iteration 2673 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00186 |       0.00000 |       0.00155 |       0.00157 |       0.75952
     -0.00235 |       0.00000 |       0.00106 |       0.00111 |       0.76196
     -0.00342 |       0.00000 |       0.00098 |       0.00138 |       0.76292
     -0.00470 |       0.00000 |       0.00095 |       0.00157 |       0.76030
     -0.00438 |       0.00000 |       0.00093 |       0.00201 |       0.75880
     -0.00564 |       0.00000 |       0.00089 |       0.00243 |       0.76094
     -0.00588 |       0.00000 |       0.00086 |       0.00252 |       0.76082
     -0.00633 |       0.00000 |       0.00084 |       0.00286 |       0.75953
     -0.00673 |       0.00000 |       0.00084 |       0.00297 |       0.75968
     -0.00687 |       0.00000 |       0.00084 |       0.00305 |       0.76006
Evaluating losses...
     -0.00726 |       0.00000 |       0.00081 |       0.00311 |     

     -0.00541 |       0.00000 |       0.00149 |       0.00269 |       0.79096
     -0.00615 |       0.00000 |       0.00149 |       0.00265 |       0.78975
     -0.00626 |       0.00000 |       0.00148 |       0.00290 |       0.79019
Evaluating losses...
     -0.00579 |       0.00000 |       0.00147 |       0.00343 |       0.79159
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.27         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4330         |
| TimeElapsed     | 1.61e+04     |
| TimestepsSoFar  | 10973184     |
| ev_tdlam_before | 0.524        |
| loss_ent        | 0.7915933    |
| loss_kl         | 0.0034336948 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.00579272  |
| loss_vf_loss    | 0.0014715231 |
----------------------------------
********** Iteration 2679 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00741 |       0.00000 |       0.00146 

********** Iteration 2684 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00045 |       0.00000 |       0.00384 |       0.00114 |       0.75664
     -0.00273 |       0.00000 |       0.00276 |       0.00176 |       0.75900
     -0.00514 |       0.00000 |       0.00253 |       0.00152 |       0.75811
     -0.00484 |       0.00000 |       0.00237 |       0.00219 |       0.75768
     -0.00604 |       0.00000 |       0.00227 |       0.00220 |       0.75870
     -0.00645 |       0.00000 |       0.00218 |       0.00295 |       0.75882
     -0.00660 |       0.00000 |       0.00213 |       0.00299 |       0.75883
     -0.00728 |       0.00000 |       0.00208 |       0.00259 |       0.75880
     -0.00699 |       0.00000 |       0.00197 |       0.00302 |       0.75910
     -0.00761 |       0.00000 |       0.00189 |       0.00308 |       0.75795
Evaluating losses...
     -0.00841 |       0.00000 |       0.00181 |       0.00284 |     

     -0.00697 |       0.00000 |       0.00143 |       0.00322 |       0.76631
     -0.00758 |       0.00000 |       0.00140 |       0.00309 |       0.76538
     -0.00797 |       0.00000 |       0.00138 |       0.00306 |       0.76718
Evaluating losses...
     -0.00812 |       0.00000 |       0.00133 |       0.00323 |       0.76665
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.18         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4345         |
| TimeElapsed     | 1.63e+04     |
| TimestepsSoFar  | 11018240     |
| ev_tdlam_before | 0.731        |
| loss_ent        | 0.7666539    |
| loss_kl         | 0.0032318984 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008116723 |
| loss_vf_loss    | 0.0013296797 |
----------------------------------
********** Iteration 2690 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00091 |       0.00000 |       0.00323 

********** Iteration 2695 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00036 |       0.00000 |       0.00066 |       0.00142 |       0.77426
     -0.00280 |       0.00000 |       0.00043 |       0.00146 |       0.77294
     -0.00443 |       0.00000 |       0.00035 |       0.00140 |       0.77503
     -0.00544 |       0.00000 |       0.00028 |       0.00162 |       0.77525
     -0.00562 |       0.00000 |       0.00023 |       0.00196 |       0.77390
     -0.00682 |       0.00000 |       0.00021 |       0.00179 |       0.77630
     -0.00627 |       0.00000 |       0.00019 |       0.00208 |       0.77444
     -0.00734 |       0.00000 |       0.00017 |       0.00232 |       0.77421
     -0.00723 |       0.00000 |       0.00016 |       0.00254 |       0.77615
     -0.00787 |       0.00000 |       0.00016 |       0.00282 |       0.77421
Evaluating losses...
     -0.00858 |       0.00000 |       0.00015 |       0.00283 |     

     -0.00744 |       0.00000 |      3.21e-05 |       0.00255 |       0.80358
     -0.00778 |       0.00000 |      3.03e-05 |       0.00287 |       0.80418
     -0.00730 |       0.00000 |      2.96e-05 |       0.00355 |       0.80434
Evaluating losses...
     -0.00824 |       0.00000 |      2.86e-05 |       0.00350 |       0.80480
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.18         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4360         |
| TimeElapsed     | 1.63e+04     |
| TimestepsSoFar  | 11063296     |
| ev_tdlam_before | -6.24        |
| loss_ent        | 0.80480456   |
| loss_kl         | 0.0035006087 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.00824377  |
| loss_vf_loss    | 2.862576e-05 |
----------------------------------
********** Iteration 2701 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00211 |       0.00000 |      5.04e-05 

********** Iteration 2706 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00200 |       0.00000 |       0.00113 |       0.00145 |       0.77388
     -0.00160 |       0.00000 |       0.00087 |       0.00104 |       0.77314
     -0.00332 |       0.00000 |       0.00078 |       0.00122 |       0.77050
     -0.00351 |       0.00000 |       0.00073 |       0.00142 |       0.76949
     -0.00328 |       0.00000 |       0.00070 |       0.00142 |       0.77289
     -0.00460 |       0.00000 |       0.00068 |       0.00180 |       0.77108
     -0.00452 |       0.00000 |       0.00067 |       0.00169 |       0.77390
     -0.00482 |       0.00000 |       0.00067 |       0.00182 |       0.77411
     -0.00514 |       0.00000 |       0.00064 |       0.00213 |       0.77425
     -0.00547 |       0.00000 |       0.00063 |       0.00225 |       0.77484
Evaluating losses...
     -0.00604 |       0.00000 |       0.00061 |       0.00222 |     

     -0.00906 |       0.00000 |      6.39e-06 |       0.00381 |       0.80254
     -0.01004 |       0.00000 |      6.39e-06 |       0.00359 |       0.80346
     -0.01051 |       0.00000 |      5.72e-06 |       0.00384 |       0.80381
Evaluating losses...
     -0.01145 |       0.00000 |      5.44e-06 |       0.00374 |       0.80384
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.07         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4375         |
| TimeElapsed     | 1.63e+04     |
| TimestepsSoFar  | 11108352     |
| ev_tdlam_before | -2.61        |
| loss_ent        | 0.80384064   |
| loss_kl         | 0.003739928  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.011445532 |
| loss_vf_loss    | 5.442257e-06 |
----------------------------------
********** Iteration 2712 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00237 |       0.00000 |       0.00134 

********** Iteration 2717 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00122 |       0.00000 |       0.00021 |       0.00146 |       0.79410
     -0.00315 |       0.00000 |       0.00012 |       0.00128 |       0.79591
     -0.00527 |       0.00000 |       0.00010 |       0.00173 |       0.79677
     -0.00659 |       0.00000 |      9.28e-05 |       0.00208 |       0.79624
     -0.00776 |       0.00000 |      8.57e-05 |       0.00261 |       0.79756
     -0.00782 |       0.00000 |      8.16e-05 |       0.00275 |       0.79698
     -0.00901 |       0.00000 |      7.91e-05 |       0.00288 |       0.79820
     -0.00934 |       0.00000 |      7.39e-05 |       0.00324 |       0.79782
     -0.00996 |       0.00000 |      7.15e-05 |       0.00308 |       0.79713
     -0.01025 |       0.00000 |      6.92e-05 |       0.00345 |       0.79755
Evaluating losses...
     -0.01132 |       0.00000 |      6.68e-05 |       0.00384 |     

     -0.01031 |       0.00000 |       0.00262 |       0.00505 |       0.80434
     -0.01060 |       0.00000 |       0.00258 |       0.00547 |       0.80350
     -0.01070 |       0.00000 |       0.00252 |       0.00545 |       0.80493
Evaluating losses...
     -0.01118 |       0.00000 |       0.00240 |       0.00538 |       0.80450
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.07          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 4390          |
| TimeElapsed     | 1.64e+04      |
| TimestepsSoFar  | 11153408      |
| ev_tdlam_before | 0.517         |
| loss_ent        | 0.80450165    |
| loss_kl         | 0.0053770887  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0111796325 |
| loss_vf_loss    | 0.0024017056  |
-----------------------------------
********** Iteration 2723 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00018 |       0.00000 | 

********** Iteration 2728 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00213 |       0.00000 |       0.00030 |       0.00145 |       0.80737
     -0.00248 |       0.00000 |       0.00018 |       0.00137 |       0.80265
     -0.00509 |       0.00000 |       0.00015 |       0.00198 |       0.80129
     -0.00649 |       0.00000 |       0.00013 |       0.00186 |       0.80258
     -0.00779 |       0.00000 |       0.00012 |       0.00206 |       0.80180
     -0.00830 |       0.00000 |       0.00011 |       0.00205 |       0.80298
     -0.00906 |       0.00000 |       0.00011 |       0.00236 |       0.80227
     -0.00908 |       0.00000 |       0.00010 |       0.00271 |       0.80263
     -0.00964 |       0.00000 |      9.57e-05 |       0.00283 |       0.80280
     -0.00944 |       0.00000 |      9.25e-05 |       0.00364 |       0.80415
Evaluating losses...
     -0.01021 |       0.00000 |      8.85e-05 |       0.00419 |     

     -0.01079 |       0.00000 |      2.91e-05 |       0.00367 |       0.79994
     -0.01168 |       0.00000 |      2.77e-05 |       0.00349 |       0.79900
     -0.01182 |       0.00000 |      2.61e-05 |       0.00371 |       0.79987
Evaluating losses...
     -0.01325 |       0.00000 |      2.56e-05 |       0.00416 |       0.79913
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.14          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 4405          |
| TimeElapsed     | 1.64e+04      |
| TimestepsSoFar  | 11198464      |
| ev_tdlam_before | -2.32         |
| loss_ent        | 0.7991286     |
| loss_kl         | 0.0041561313  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.01325168   |
| loss_vf_loss    | 2.5550058e-05 |
-----------------------------------
********** Iteration 2734 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00039 |       0.00000 | 

********** Iteration 2739 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00091 |       0.00000 |       0.00146 |       0.00107 |       0.81289
     -0.00194 |       0.00000 |       0.00117 |       0.00158 |       0.80981
     -0.00403 |       0.00000 |       0.00112 |       0.00178 |       0.81000
     -0.00483 |       0.00000 |       0.00110 |       0.00247 |       0.80824
     -0.00569 |       0.00000 |       0.00112 |       0.00257 |       0.80746
     -0.00576 |       0.00000 |       0.00108 |       0.00246 |       0.80851
     -0.00621 |       0.00000 |       0.00106 |       0.00257 |       0.81061
     -0.00647 |       0.00000 |       0.00106 |       0.00290 |       0.80875
     -0.00616 |       0.00000 |       0.00105 |       0.00309 |       0.80910
     -0.00676 |       0.00000 |       0.00103 |       0.00311 |       0.80834
Evaluating losses...
     -0.00730 |       0.00000 |       0.00100 |       0.00305 |     

     -0.00606 |       0.00000 |       0.00127 |       0.00181 |       0.80691
     -0.00604 |       0.00000 |       0.00126 |       0.00203 |       0.80703
     -0.00612 |       0.00000 |       0.00126 |       0.00212 |       0.80643
Evaluating losses...
     -0.00639 |       0.00000 |       0.00121 |       0.00219 |       0.80578
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.1          |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4420         |
| TimeElapsed     | 1.65e+04     |
| TimestepsSoFar  | 11243520     |
| ev_tdlam_before | 0.215        |
| loss_ent        | 0.8057754    |
| loss_kl         | 0.002186911  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.006393263 |
| loss_vf_loss    | 0.0012133523 |
----------------------------------
********** Iteration 2745 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00066 |       0.00000 |      7.79e-05 

********** Iteration 2750 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     9.54e-05 |       0.00000 |       0.00168 |       0.00112 |       0.80488
     -0.00343 |       0.00000 |       0.00111 |       0.00177 |       0.80278
     -0.00529 |       0.00000 |       0.00094 |       0.00184 |       0.80213
     -0.00603 |       0.00000 |       0.00088 |       0.00205 |       0.80134
     -0.00655 |       0.00000 |       0.00080 |       0.00210 |       0.80257
     -0.00738 |       0.00000 |       0.00076 |       0.00240 |       0.80298
     -0.00762 |       0.00000 |       0.00074 |       0.00260 |       0.80273
     -0.00798 |       0.00000 |       0.00070 |       0.00276 |       0.80344
     -0.00850 |       0.00000 |       0.00067 |       0.00285 |       0.80346
     -0.00851 |       0.00000 |       0.00067 |       0.00309 |       0.80363
Evaluating losses...
     -0.00906 |       0.00000 |       0.00063 |       0.00308 |     

     -0.00804 |       0.00000 |      5.08e-05 |       0.00206 |       0.81827
     -0.00822 |       0.00000 |      4.82e-05 |       0.00222 |       0.81677
     -0.00871 |       0.00000 |      4.65e-05 |       0.00241 |       0.81793
Evaluating losses...
     -0.00953 |       0.00000 |      4.38e-05 |       0.00252 |       0.81902
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.06          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4434          |
| TimeElapsed     | 1.67e+04      |
| TimestepsSoFar  | 11288576      |
| ev_tdlam_before | -0.473        |
| loss_ent        | 0.8190156     |
| loss_kl         | 0.0025216192  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.009531165  |
| loss_vf_loss    | 4.3802844e-05 |
-----------------------------------
********** Iteration 2756 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00062 |       0.00000 | 

********** Iteration 2761 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00022 |       0.00000 |       0.00259 |       0.00027 |       0.77688
     -0.00354 |       0.00000 |       0.00172 |       0.00064 |       0.77609
     -0.00491 |       0.00000 |       0.00103 |       0.00088 |       0.77579
     -0.00504 |       0.00000 |       0.00084 |       0.00130 |       0.77467
     -0.00562 |       0.00000 |       0.00077 |       0.00139 |       0.77546
     -0.00617 |       0.00000 |       0.00075 |       0.00145 |       0.77474
     -0.00674 |       0.00000 |       0.00074 |       0.00163 |       0.77621
     -0.00675 |       0.00000 |       0.00070 |       0.00179 |       0.77553
     -0.00714 |       0.00000 |       0.00071 |       0.00186 |       0.77620
     -0.00712 |       0.00000 |       0.00068 |       0.00203 |       0.77570
Evaluating losses...
     -0.00768 |       0.00000 |       0.00065 |       0.00208 |     

     -0.00659 |       0.00000 |       0.00234 |       0.00194 |       0.78680
     -0.00698 |       0.00000 |       0.00231 |       0.00217 |       0.78631
     -0.00668 |       0.00000 |       0.00230 |       0.00263 |       0.78801
Evaluating losses...
     -0.00761 |       0.00000 |       0.00221 |       0.00308 |       0.78821
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.08          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4449          |
| TimeElapsed     | 1.67e+04      |
| TimestepsSoFar  | 11333632      |
| ev_tdlam_before | 0.555         |
| loss_ent        | 0.78821397    |
| loss_kl         | 0.003082552   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0076054605 |
| loss_vf_loss    | 0.0022098497  |
-----------------------------------
********** Iteration 2767 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.44e-05 |       0.00000 | 

********** Iteration 2772 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00141 |       0.00000 |       0.00181 |       0.00258 |       0.83133
     -0.00136 |       0.00000 |       0.00163 |       0.00263 |       0.82820
     -0.00422 |       0.00000 |       0.00153 |       0.00173 |       0.82905
     -0.00515 |       0.00000 |       0.00149 |       0.00165 |       0.83126
     -0.00483 |       0.00000 |       0.00143 |       0.00223 |       0.83071
     -0.00554 |       0.00000 |       0.00140 |       0.00252 |       0.82904
     -0.00538 |       0.00000 |       0.00136 |       0.00298 |       0.82958
     -0.00559 |       0.00000 |       0.00137 |       0.00504 |       0.83255
     -0.00627 |       0.00000 |       0.00135 |       0.00283 |       0.83060
     -0.00677 |       0.00000 |       0.00132 |       0.00289 |       0.83195
Evaluating losses...
     -0.00739 |       0.00000 |       0.00128 |       0.00263 |     

     -0.00854 |       0.00000 |      6.37e-05 |       0.00237 |       0.82147
     -0.00866 |       0.00000 |      6.26e-05 |       0.00253 |       0.82063
     -0.00911 |       0.00000 |      6.19e-05 |       0.00285 |       0.82103
Evaluating losses...
     -0.01017 |       0.00000 |      5.91e-05 |       0.00286 |       0.81898
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.17          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4464          |
| TimeElapsed     | 1.68e+04      |
| TimestepsSoFar  | 11378688      |
| ev_tdlam_before | -1.38         |
| loss_ent        | 0.81898206    |
| loss_kl         | 0.002858322   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.010173149  |
| loss_vf_loss    | 5.9099784e-05 |
-----------------------------------
********** Iteration 2778 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00041 |       0.00000 | 

********** Iteration 2783 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00012 |       0.00000 |      6.27e-05 |       0.00094 |       0.79302
     -0.00369 |       0.00000 |      4.68e-05 |       0.00168 |       0.79156
     -0.00597 |       0.00000 |      4.00e-05 |       0.00152 |       0.79132
     -0.00726 |       0.00000 |      3.53e-05 |       0.00188 |       0.79060
     -0.00826 |       0.00000 |      3.34e-05 |       0.00186 |       0.79199
     -0.00908 |       0.00000 |      3.05e-05 |       0.00222 |       0.79068
     -0.01006 |       0.00000 |      2.85e-05 |       0.00224 |       0.79318
     -0.01057 |       0.00000 |      2.80e-05 |       0.00246 |       0.79240
     -0.01097 |       0.00000 |      2.65e-05 |       0.00257 |       0.79243
     -0.01148 |       0.00000 |      2.51e-05 |       0.00301 |       0.79154
Evaluating losses...
     -0.01268 |       0.00000 |      2.45e-05 |       0.00285 |     

     -0.00732 |       0.00000 |       0.00119 |       0.00192 |       0.79393
     -0.00742 |       0.00000 |       0.00117 |       0.00208 |       0.79341
     -0.00772 |       0.00000 |       0.00116 |       0.00223 |       0.79413
Evaluating losses...
     -0.00821 |       0.00000 |       0.00111 |       0.00249 |       0.79374
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.16         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4479         |
| TimeElapsed     | 1.68e+04     |
| TimestepsSoFar  | 11423744     |
| ev_tdlam_before | 0.0779       |
| loss_ent        | 0.7937398    |
| loss_kl         | 0.0024932353 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008206017 |
| loss_vf_loss    | 0.0011145443 |
----------------------------------
********** Iteration 2789 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00069 |       0.00000 |       0.00259 

********** Iteration 2794 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00038 |       0.00000 |       0.00237 |       0.00088 |       0.80012
     -0.00287 |       0.00000 |       0.00112 |       0.00193 |       0.80115
     -0.00496 |       0.00000 |       0.00097 |       0.00201 |       0.80087
     -0.00514 |       0.00000 |       0.00090 |       0.00268 |       0.80085
     -0.00607 |       0.00000 |       0.00084 |       0.00224 |       0.79940
     -0.00653 |       0.00000 |       0.00082 |       0.00311 |       0.79926
     -0.00730 |       0.00000 |       0.00079 |       0.00280 |       0.79782
     -0.00756 |       0.00000 |       0.00078 |       0.00302 |       0.79939
     -0.00735 |       0.00000 |       0.00077 |       0.00356 |       0.79884
     -0.00811 |       0.00000 |       0.00075 |       0.00357 |       0.79862
Evaluating losses...
     -0.00849 |       0.00000 |       0.00073 |       0.00343 |     

     -0.01117 |       0.00000 |      2.17e-05 |       0.00272 |       0.78956
     -0.01163 |       0.00000 |      2.06e-05 |       0.00271 |       0.78987
     -0.01180 |       0.00000 |      2.00e-05 |       0.00339 |       0.79022
Evaluating losses...
     -0.01310 |       0.00000 |      1.91e-05 |       0.00294 |       0.79134
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.13          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4494          |
| TimeElapsed     | 1.68e+04      |
| TimestepsSoFar  | 11468800      |
| ev_tdlam_before | -0.916        |
| loss_ent        | 0.79134095    |
| loss_kl         | 0.0029419241  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.013104234  |
| loss_vf_loss    | 1.9110996e-05 |
-----------------------------------
********** Iteration 2800 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00048 |       0.00000 | 

********** Iteration 2805 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00097 |       0.00000 |      1.30e-05 |       0.00078 |       0.79097
     -0.00376 |       0.00000 |      1.17e-05 |       0.00153 |       0.79142
     -0.00636 |       0.00000 |      1.09e-05 |       0.00184 |       0.79350
     -0.00824 |       0.00000 |      1.04e-05 |       0.00177 |       0.79314
     -0.00963 |       0.00000 |      1.00e-05 |       0.00199 |       0.79239
     -0.01041 |       0.00000 |      9.81e-06 |       0.00230 |       0.79286
     -0.01130 |       0.00000 |      9.41e-06 |       0.00234 |       0.79382
     -0.01175 |       0.00000 |      9.24e-06 |       0.00259 |       0.79370
     -0.01218 |       0.00000 |      8.77e-06 |       0.00271 |       0.79348
     -0.01294 |       0.00000 |      8.62e-06 |       0.00296 |       0.79357
Evaluating losses...
     -0.01425 |       0.00000 |      8.34e-06 |       0.00306 |     

     -0.00691 |       0.00000 |       0.00201 |       0.00529 |       0.81518
     -0.00742 |       0.00000 |       0.00200 |       0.00634 |       0.81431
     -0.00766 |       0.00000 |       0.00196 |       0.00572 |       0.81454
     -0.00783 |       0.00000 |       0.00194 |       0.00651 |       0.81560
Evaluating losses...
     -0.00824 |       0.00000 |       0.00187 |       0.00632 |       0.81581
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.05         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4509         |
| TimeElapsed     | 1.7e+04      |
| TimestepsSoFar  | 11513856     |
| ev_tdlam_before | 0.6          |
| loss_ent        | 0.8158093    |
| loss_kl         | 0.0063170507 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008243074 |
| loss_vf_loss    | 0.0018677689 |
----------------------------------
********** Iteration 2811 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 2816 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00049 |       0.00000 |       0.00169 |       0.00234 |       0.81298
     -0.00335 |       0.00000 |       0.00137 |       0.00261 |       0.81472
     -0.00428 |       0.00000 |       0.00122 |       0.00275 |       0.81722
     -0.00513 |       0.00000 |       0.00115 |       0.00262 |       0.81638
     -0.00551 |       0.00000 |       0.00110 |       0.00281 |       0.81743
     -0.00605 |       0.00000 |       0.00101 |       0.00266 |       0.81674
     -0.00593 |       0.00000 |       0.00097 |       0.00249 |       0.81796
     -0.00615 |       0.00000 |       0.00094 |       0.00271 |       0.81804
     -0.00638 |       0.00000 |       0.00091 |       0.00283 |       0.81779
     -0.00653 |       0.00000 |       0.00088 |       0.00300 |       0.81891
Evaluating losses...
     -0.00689 |       0.00000 |       0.00086 |       0.00315 |     

     -0.00990 |       0.00000 |      5.46e-05 |       0.00262 |       0.79922
     -0.01054 |       0.00000 |      5.23e-05 |       0.00276 |       0.79954
     -0.01118 |       0.00000 |      5.10e-05 |       0.00275 |       0.79973
Evaluating losses...
     -0.01196 |       0.00000 |      4.94e-05 |       0.00284 |       0.79989
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.05          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4524          |
| TimeElapsed     | 1.71e+04      |
| TimestepsSoFar  | 11558912      |
| ev_tdlam_before | -0.385        |
| loss_ent        | 0.7998913     |
| loss_kl         | 0.002839912   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.011963706  |
| loss_vf_loss    | 4.9422386e-05 |
-----------------------------------
********** Iteration 2822 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -8.14e-05 |       0.00000 | 

********** Iteration 2827 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00238 |       0.00000 |       0.00434 |       0.00067 |       0.82831
     -0.00307 |       0.00000 |       0.00355 |       0.00209 |       0.82461
     -0.00549 |       0.00000 |       0.00314 |       0.00151 |       0.82594
     -0.00696 |       0.00000 |       0.00288 |       0.00185 |       0.82775
     -0.00755 |       0.00000 |       0.00271 |       0.00172 |       0.82854
     -0.00781 |       0.00000 |       0.00254 |       0.00224 |       0.82879
     -0.00853 |       0.00000 |       0.00242 |       0.00240 |       0.82898
     -0.00899 |       0.00000 |       0.00234 |       0.00241 |       0.82938
     -0.00932 |       0.00000 |       0.00225 |       0.00228 |       0.82885
     -0.00940 |       0.00000 |       0.00217 |       0.00250 |       0.82812
Evaluating losses...
     -0.00991 |       0.00000 |       0.00206 |       0.00256 |     

     -0.00613 |       0.00000 |       0.00109 |       0.00246 |       0.80510
     -0.00604 |       0.00000 |       0.00108 |       0.00400 |       0.80480
     -0.00607 |       0.00000 |       0.00108 |       0.00386 |       0.80316
Evaluating losses...
     -0.00721 |       0.00000 |       0.00106 |       0.00352 |       0.80389
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.11         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4539         |
| TimeElapsed     | 1.71e+04     |
| TimestepsSoFar  | 11603968     |
| ev_tdlam_before | 0.487        |
| loss_ent        | 0.8038876    |
| loss_kl         | 0.0035198512 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.00720916  |
| loss_vf_loss    | 0.0010589598 |
----------------------------------
********** Iteration 2833 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00048 |       0.00000 |      2.94e-05 

********** Iteration 2838 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00029 |       0.00000 |       0.00010 |       0.00089 |       0.81688
     -0.00378 |       0.00000 |      7.29e-05 |       0.00131 |       0.81680
     -0.00534 |       0.00000 |      5.89e-05 |       0.00124 |       0.81630
     -0.00612 |       0.00000 |      5.04e-05 |       0.00151 |       0.81528
     -0.00687 |       0.00000 |      4.30e-05 |       0.00157 |       0.81624
     -0.00750 |       0.00000 |      3.90e-05 |       0.00177 |       0.81413
     -0.00779 |       0.00000 |      3.52e-05 |       0.00200 |       0.81639
     -0.00836 |       0.00000 |      3.27e-05 |       0.00222 |       0.81463
     -0.00860 |       0.00000 |      3.10e-05 |       0.00212 |       0.81540
     -0.00887 |       0.00000 |      2.95e-05 |       0.00244 |       0.81504
Evaluating losses...
     -0.00947 |       0.00000 |      2.74e-05 |       0.00235 |     

     -0.00889 |       0.00000 |       0.00195 |       0.00410 |       0.77981
     -0.00865 |       0.00000 |       0.00191 |       0.00568 |       0.78224
     -0.00915 |       0.00000 |       0.00189 |       0.00283 |       0.77901
Evaluating losses...
     -0.01032 |       0.00000 |       0.00186 |       0.00240 |       0.77958
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.14         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4554         |
| TimeElapsed     | 1.71e+04     |
| TimestepsSoFar  | 11649024     |
| ev_tdlam_before | 0.633        |
| loss_ent        | 0.7795805    |
| loss_kl         | 0.0023976883 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010315636 |
| loss_vf_loss    | 0.0018564765 |
----------------------------------
********** Iteration 2844 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00145 |       0.00000 |       0.00211 

********** Iteration 2849 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     7.03e-05 |       0.00000 |      5.87e-05 |       0.00064 |       0.82881
     -0.00420 |       0.00000 |      4.27e-05 |       0.00148 |       0.82837
     -0.00614 |       0.00000 |      3.73e-05 |       0.00158 |       0.82954
     -0.00765 |       0.00000 |      3.56e-05 |       0.00170 |       0.82796
     -0.00841 |       0.00000 |      3.36e-05 |       0.00167 |       0.82881
     -0.00918 |       0.00000 |      3.14e-05 |       0.00207 |       0.82949
     -0.00975 |       0.00000 |      3.02e-05 |       0.00220 |       0.82936
     -0.01038 |       0.00000 |      2.91e-05 |       0.00213 |       0.82896
     -0.01066 |       0.00000 |      2.82e-05 |       0.00223 |       0.82910
     -0.01143 |       0.00000 |      2.67e-05 |       0.00227 |       0.82885
Evaluating losses...
     -0.01234 |       0.00000 |      2.59e-05 |       0.00240 |     

     -0.01021 |       0.00000 |       0.00013 |       0.00251 |       0.77895
     -0.01050 |       0.00000 |       0.00012 |       0.00243 |       0.77746
     -0.01070 |       0.00000 |       0.00012 |       0.00260 |       0.77769
Evaluating losses...
     -0.01207 |       0.00000 |       0.00012 |       0.00271 |       0.77839
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.13          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4569          |
| TimeElapsed     | 1.72e+04      |
| TimestepsSoFar  | 11694080      |
| ev_tdlam_before | -0.207        |
| loss_ent        | 0.778395      |
| loss_kl         | 0.0027145804  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.012073179  |
| loss_vf_loss    | 0.00012201649 |
-----------------------------------
********** Iteration 2855 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00053 |       0.00000 | 

********** Iteration 2860 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00061 |       0.00000 |       0.00022 |       0.00065 |       0.79591
     -0.00258 |       0.00000 |       0.00014 |       0.00110 |       0.79472
     -0.00372 |       0.00000 |       0.00010 |       0.00116 |       0.79514
     -0.00463 |       0.00000 |      8.62e-05 |       0.00150 |       0.79451
     -0.00522 |       0.00000 |      7.48e-05 |       0.00158 |       0.79453
     -0.00580 |       0.00000 |      6.73e-05 |       0.00167 |       0.79475
     -0.00619 |       0.00000 |      6.41e-05 |       0.00184 |       0.79460
     -0.00658 |       0.00000 |      5.97e-05 |       0.00194 |       0.79516
     -0.00666 |       0.00000 |      5.80e-05 |       0.00216 |       0.79453
     -0.00710 |       0.00000 |      5.55e-05 |       0.00215 |       0.79474
Evaluating losses...
     -0.00787 |       0.00000 |      5.39e-05 |       0.00207 |     

     -0.00579 |       0.00000 |       0.00041 |       0.00195 |       0.78725
     -0.00616 |       0.00000 |       0.00040 |       0.00219 |       0.78838
     -0.00623 |       0.00000 |       0.00040 |       0.00226 |       0.78735
Evaluating losses...
     -0.00669 |       0.00000 |       0.00041 |       0.00230 |       0.78709
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.12          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4584          |
| TimeElapsed     | 1.72e+04      |
| TimestepsSoFar  | 11739136      |
| ev_tdlam_before | 0.689         |
| loss_ent        | 0.78708655    |
| loss_kl         | 0.0023019363  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0066900114 |
| loss_vf_loss    | 0.00040773416 |
-----------------------------------
********** Iteration 2866 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00014 |       0.00000 | 

********** Iteration 2871 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00151 |       0.00000 |       0.00283 |       0.00057 |       0.81070
     -0.00352 |       0.00000 |       0.00238 |       0.00127 |       0.80808
     -0.00445 |       0.00000 |       0.00225 |       0.00131 |       0.81019
     -0.00588 |       0.00000 |       0.00220 |       0.00130 |       0.80970
     -0.00583 |       0.00000 |       0.00216 |       0.00147 |       0.81039
     -0.00617 |       0.00000 |       0.00213 |       0.00172 |       0.81116
     -0.00620 |       0.00000 |       0.00210 |       0.00194 |       0.80996
     -0.00722 |       0.00000 |       0.00208 |       0.00213 |       0.81033
     -0.00693 |       0.00000 |       0.00208 |       0.00224 |       0.80999
     -0.00742 |       0.00000 |       0.00205 |       0.00231 |       0.81015
Evaluating losses...
     -0.00817 |       0.00000 |       0.00204 |       0.00237 |     

     -0.00759 |       0.00000 |       0.00165 |       0.00375 |       0.82843
     -0.00752 |       0.00000 |       0.00162 |       0.00387 |       0.82934
     -0.00798 |       0.00000 |       0.00160 |       0.00418 |       0.82915
Evaluating losses...
     -0.00871 |       0.00000 |       0.00154 |       0.00433 |       0.82903
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.16         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4599         |
| TimeElapsed     | 1.74e+04     |
| TimestepsSoFar  | 11784192     |
| ev_tdlam_before | 0.45         |
| loss_ent        | 0.8290338    |
| loss_kl         | 0.00433102   |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008711104 |
| loss_vf_loss    | 0.0015363927 |
----------------------------------
********** Iteration 2877 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00019 |       0.00000 |       0.00155 

********** Iteration 2882 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00080 |       0.00000 |       0.00037 |       0.00119 |       0.80806
     -0.00320 |       0.00000 |       0.00019 |       0.00146 |       0.80956
     -0.00486 |       0.00000 |       0.00017 |       0.00170 |       0.80934
     -0.00589 |       0.00000 |       0.00015 |       0.00243 |       0.80954
     -0.00670 |       0.00000 |       0.00014 |       0.00230 |       0.81105
     -0.00716 |       0.00000 |       0.00013 |       0.00223 |       0.81045
     -0.00770 |       0.00000 |       0.00013 |       0.00283 |       0.81247
     -0.00764 |       0.00000 |       0.00012 |       0.00304 |       0.80957
     -0.00820 |       0.00000 |       0.00012 |       0.00333 |       0.81219
     -0.00857 |       0.00000 |       0.00011 |       0.00335 |       0.81135
Evaluating losses...
     -0.00924 |       0.00000 |       0.00012 |       0.00367 |     

     -0.00868 |       0.00000 |       0.00100 |       0.00255 |       0.77791
     -0.00954 |       0.00000 |       0.00100 |       0.00239 |       0.77897
Evaluating losses...
     -0.01034 |       0.00000 |       0.00097 |       0.00251 |       0.77956
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.17          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4614          |
| TimeElapsed     | 1.75e+04      |
| TimestepsSoFar  | 11829248      |
| ev_tdlam_before | 0.606         |
| loss_ent        | 0.7795564     |
| loss_kl         | 0.00250753    |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.010343396  |
| loss_vf_loss    | 0.00097222754 |
-----------------------------------
********** Iteration 2888 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00099 |       0.00000 |       0.00342 |       0.00071 |       0.82522
     -0.00378 |       0.00000 | 

********** Iteration 2893 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00170 |       0.00000 |       0.00019 |       0.00052 |       0.82588
     -0.00322 |       0.00000 |       0.00013 |       0.00102 |       0.82350
     -0.00512 |       0.00000 |      9.60e-05 |       0.00159 |       0.82442
     -0.00650 |       0.00000 |      7.93e-05 |       0.00140 |       0.82405
     -0.00727 |       0.00000 |      6.87e-05 |       0.00172 |       0.82487
     -0.00779 |       0.00000 |      6.12e-05 |       0.00175 |       0.82428
     -0.00817 |       0.00000 |      5.66e-05 |       0.00199 |       0.82600
     -0.00888 |       0.00000 |      5.14e-05 |       0.00208 |       0.82600
     -0.00933 |       0.00000 |      4.86e-05 |       0.00214 |       0.82565
     -0.00959 |       0.00000 |      4.56e-05 |       0.00230 |       0.82466
Evaluating losses...
     -0.01063 |       0.00000 |      4.36e-05 |       0.00230 |     

     -0.00641 |       0.00000 |       0.00084 |       0.00199 |       0.79829
     -0.00666 |       0.00000 |       0.00083 |       0.00229 |       0.79896
     -0.00694 |       0.00000 |       0.00081 |       0.00217 |       0.79887
Evaluating losses...
     -0.00744 |       0.00000 |       0.00078 |       0.00252 |       0.80012
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.21          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4629          |
| TimeElapsed     | 1.75e+04      |
| TimestepsSoFar  | 11874304      |
| ev_tdlam_before | 0.571         |
| loss_ent        | 0.8001179     |
| loss_kl         | 0.0025190334  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0074375975 |
| loss_vf_loss    | 0.00078414567 |
-----------------------------------
********** Iteration 2899 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00287 |       0.00000 | 

********** Iteration 2904 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00030 |       0.00000 |       0.00120 |       0.00110 |       0.81289
     -0.00299 |       0.00000 |       0.00048 |       0.00184 |       0.81263
     -0.00397 |       0.00000 |       0.00039 |       0.00196 |       0.81037
     -0.00368 |       0.00000 |       0.00033 |       0.00135 |       0.81103
     -0.00360 |       0.00000 |       0.00032 |       0.00672 |       0.81834
     -0.00494 |       0.00000 |       0.00033 |       0.00264 |       0.81611
     -0.00545 |       0.00000 |       0.00032 |       0.00162 |       0.81268
     -0.00586 |       0.00000 |       0.00031 |       0.00152 |       0.81250
     -0.00606 |       0.00000 |       0.00029 |       0.00157 |       0.81233
     -0.00601 |       0.00000 |       0.00030 |       0.00158 |       0.81192
Evaluating losses...
     -0.00646 |       0.00000 |       0.00027 |       0.00157 |     

     -0.00518 |       0.00000 |       0.00082 |       0.00194 |       0.84442
     -0.00538 |       0.00000 |       0.00080 |       0.00186 |       0.84464
     -0.00556 |       0.00000 |       0.00076 |       0.00178 |       0.84452
     -0.00576 |       0.00000 |       0.00072 |       0.00183 |       0.84497
Evaluating losses...
     -0.00588 |       0.00000 |       0.00069 |       0.00180 |       0.84465
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.18         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4644         |
| TimeElapsed     | 1.75e+04     |
| TimestepsSoFar  | 11919360     |
| ev_tdlam_before | 0.0855       |
| loss_ent        | 0.8446544    |
| loss_kl         | 0.0017956002 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.005884212 |
| loss_vf_loss    | 0.0006856214 |
----------------------------------
********** Iteration 2910 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss 

********** Iteration 2915 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00181 |       0.00000 |       0.00102 |       0.00038 |       0.85289
     -0.00250 |       0.00000 |       0.00069 |       0.00071 |       0.85407
     -0.00294 |       0.00000 |       0.00064 |       0.00071 |       0.85317
     -0.00312 |       0.00000 |       0.00059 |       0.00101 |       0.85301
     -0.00327 |       0.00000 |       0.00060 |       0.00117 |       0.85281
     -0.00408 |       0.00000 |       0.00058 |       0.00124 |       0.85211
     -0.00446 |       0.00000 |       0.00054 |       0.00143 |       0.85287
     -0.00453 |       0.00000 |       0.00056 |       0.00162 |       0.85283
     -0.00471 |       0.00000 |       0.00053 |       0.00168 |       0.85319
     -0.00493 |       0.00000 |       0.00051 |       0.00170 |       0.85351
Evaluating losses...
     -0.00502 |       0.00000 |       0.00048 |       0.00200 |     

     -0.00777 |       0.00000 |       0.00011 |       0.00154 |       0.85651
     -0.00781 |       0.00000 |       0.00010 |       0.00174 |       0.85579
     -0.00826 |       0.00000 |      9.56e-05 |       0.00189 |       0.85525
Evaluating losses...
     -0.00872 |       0.00000 |      9.14e-05 |       0.00181 |       0.85596
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.17          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4659          |
| TimeElapsed     | 1.76e+04      |
| TimestepsSoFar  | 11964416      |
| ev_tdlam_before | -3.82         |
| loss_ent        | 0.8559645     |
| loss_kl         | 0.0018097836  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.008718393  |
| loss_vf_loss    | 9.1387345e-05 |
-----------------------------------
********** Iteration 2921 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00266 |       0.00000 | 

********** Iteration 2926 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00060 |       0.00000 |       0.00148 |       0.00090 |       0.84137
     -0.00281 |       0.00000 |       0.00054 |       0.00093 |       0.83820
     -0.00421 |       0.00000 |       0.00041 |       0.00106 |       0.83757
     -0.00471 |       0.00000 |       0.00034 |       0.00123 |       0.83669
     -0.00530 |       0.00000 |       0.00032 |       0.00149 |       0.83625
     -0.00553 |       0.00000 |       0.00030 |       0.00161 |       0.83782
     -0.00597 |       0.00000 |       0.00030 |       0.00168 |       0.83796
     -0.00639 |       0.00000 |       0.00030 |       0.00185 |       0.83749
     -0.00663 |       0.00000 |       0.00028 |       0.00191 |       0.83801
     -0.00690 |       0.00000 |       0.00028 |       0.00205 |       0.83820
Evaluating losses...
     -0.00719 |       0.00000 |       0.00026 |       0.00214 |     

     -0.00561 |       0.00000 |       0.00180 |       0.00294 |       0.84170
     -0.00566 |       0.00000 |       0.00176 |       0.00304 |       0.84194
     -0.00577 |       0.00000 |       0.00176 |       0.00356 |       0.84350
     -0.00621 |       0.00000 |       0.00176 |       0.00353 |       0.84211
Evaluating losses...
     -0.00652 |       0.00000 |       0.00167 |       0.00346 |       0.84169
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.19          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 4674          |
| TimeElapsed     | 1.78e+04      |
| TimestepsSoFar  | 12009472      |
| ev_tdlam_before | 0.237         |
| loss_ent        | 0.8416933     |
| loss_kl         | 0.0034571637  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0065174713 |
| loss_vf_loss    | 0.0016661854  |
-----------------------------------
********** Iteration 2932 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 2937 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00144 |       0.00000 |       0.00197 |       0.00068 |       0.83757
     -0.00298 |       0.00000 |       0.00146 |       0.00095 |       0.83826
     -0.00454 |       0.00000 |       0.00140 |       0.00105 |       0.83683
     -0.00547 |       0.00000 |       0.00136 |       0.00139 |       0.83579
     -0.00596 |       0.00000 |       0.00138 |       0.00144 |       0.83557
     -0.00671 |       0.00000 |       0.00131 |       0.00150 |       0.83587
     -0.00746 |       0.00000 |       0.00130 |       0.00155 |       0.83636
     -0.00691 |       0.00000 |       0.00129 |       0.00165 |       0.83572
     -0.00734 |       0.00000 |       0.00126 |       0.00181 |       0.83609
     -0.00756 |       0.00000 |       0.00124 |       0.00207 |       0.83698
Evaluating losses...
     -0.00832 |       0.00000 |       0.00123 |       0.00212 |     

     -0.00836 |       0.00000 |       0.00531 |       0.00282 |       0.80639
     -0.00859 |       0.00000 |       0.00526 |       0.00292 |       0.80527
Evaluating losses...
     -0.00927 |       0.00000 |       0.00518 |       0.00295 |       0.80523
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.14         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4689         |
| TimeElapsed     | 1.79e+04     |
| TimestepsSoFar  | 12054528     |
| ev_tdlam_before | 0.497        |
| loss_ent        | 0.8052308    |
| loss_kl         | 0.002951133  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009273433 |
| loss_vf_loss    | 0.0051813964 |
----------------------------------
********** Iteration 2943 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     3.42e-05 |       0.00000 |       0.00505 |       0.00118 |       0.83574
     -0.00359 |       0.00000 |       0.00420 

********** Iteration 2948 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00041 |       0.00000 |       0.00199 |       0.00025 |       0.82014
     -0.00248 |       0.00000 |       0.00171 |       0.00060 |       0.81867
     -0.00282 |       0.00000 |       0.00147 |       0.00081 |       0.82070
     -0.00360 |       0.00000 |       0.00123 |       0.00110 |       0.81971
     -0.00429 |       0.00000 |       0.00114 |       0.00142 |       0.81895
     -0.00453 |       0.00000 |       0.00108 |       0.00144 |       0.82042
     -0.00488 |       0.00000 |       0.00104 |       0.00170 |       0.81949
     -0.00508 |       0.00000 |       0.00101 |       0.00184 |       0.82020
     -0.00531 |       0.00000 |       0.00097 |       0.00192 |       0.82153
     -0.00551 |       0.00000 |       0.00100 |       0.00205 |       0.82052
Evaluating losses...
     -0.00578 |       0.00000 |       0.00093 |       0.00203 |     

     -0.00457 |       0.00000 |       0.00116 |       0.00172 |       0.85225
     -0.00489 |       0.00000 |       0.00114 |       0.00168 |       0.85029
     -0.00471 |       0.00000 |       0.00111 |       0.00183 |       0.85113
Evaluating losses...
     -0.00534 |       0.00000 |       0.00108 |       0.00194 |       0.85205
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.15         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4704         |
| TimeElapsed     | 1.79e+04     |
| TimestepsSoFar  | 12099584     |
| ev_tdlam_before | 0.086        |
| loss_ent        | 0.85205185   |
| loss_kl         | 0.0019390266 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.005337471 |
| loss_vf_loss    | 0.0010809992 |
----------------------------------
********** Iteration 2954 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00108 |       0.00000 |       0.00055 

********** Iteration 2959 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00034 |       0.00000 |       0.00015 |       0.00052 |       0.83373
     -0.00315 |       0.00000 |       0.00011 |       0.00111 |       0.83347
     -0.00526 |       0.00000 |      9.19e-05 |       0.00132 |       0.83176
     -0.00662 |       0.00000 |      8.45e-05 |       0.00130 |       0.83362
     -0.00718 |       0.00000 |      7.83e-05 |       0.00127 |       0.83272
     -0.00790 |       0.00000 |      7.41e-05 |       0.00136 |       0.83187
     -0.00800 |       0.00000 |      7.22e-05 |       0.00155 |       0.83212
     -0.00858 |       0.00000 |      6.96e-05 |       0.00169 |       0.83160
     -0.00904 |       0.00000 |      6.58e-05 |       0.00192 |       0.83086
     -0.00923 |       0.00000 |      6.38e-05 |       0.00204 |       0.83180
Evaluating losses...
     -0.00996 |       0.00000 |      6.20e-05 |       0.00212 |     

     -0.00689 |       0.00000 |      3.09e-05 |       0.00190 |       0.82498
     -0.00747 |       0.00000 |      2.99e-05 |       0.00194 |       0.82478
     -0.00783 |       0.00000 |      2.91e-05 |       0.00214 |       0.82492
Evaluating losses...
     -0.00835 |       0.00000 |      2.85e-05 |       0.00218 |       0.82450
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.09          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 4719          |
| TimeElapsed     | 1.79e+04      |
| TimestepsSoFar  | 12144640      |
| ev_tdlam_before | -1.64         |
| loss_ent        | 0.8244991     |
| loss_kl         | 0.0021778813  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.008348577  |
| loss_vf_loss    | 2.8480377e-05 |
-----------------------------------
********** Iteration 2965 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00045 |       0.00000 | 

********** Iteration 2970 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00096 |       0.00000 |      3.93e-05 |       0.00088 |       0.81550
     -0.00438 |       0.00000 |      2.93e-05 |       0.00106 |       0.81638
     -0.00636 |       0.00000 |      2.56e-05 |       0.00122 |       0.81455
     -0.00771 |       0.00000 |      2.37e-05 |       0.00132 |       0.81432
     -0.00842 |       0.00000 |      2.13e-05 |       0.00158 |       0.81509
     -0.00915 |       0.00000 |      2.02e-05 |       0.00177 |       0.81500
     -0.00964 |       0.00000 |      1.93e-05 |       0.00177 |       0.81341
     -0.00996 |       0.00000 |      1.86e-05 |       0.00192 |       0.81397
     -0.01051 |       0.00000 |      1.81e-05 |       0.00209 |       0.81381
     -0.01086 |       0.00000 |      1.74e-05 |       0.00230 |       0.81405
Evaluating losses...
     -0.01164 |       0.00000 |      1.70e-05 |       0.00236 |     

     -0.00595 |       0.00000 |       0.00062 |       0.00170 |       0.83808
     -0.00659 |       0.00000 |       0.00060 |       0.00183 |       0.83887
     -0.00656 |       0.00000 |       0.00057 |       0.00201 |       0.83984
Evaluating losses...
     -0.00708 |       0.00000 |       0.00055 |       0.00212 |       0.83961
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.07         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4734         |
| TimeElapsed     | 1.8e+04      |
| TimestepsSoFar  | 12189696     |
| ev_tdlam_before | 0.351        |
| loss_ent        | 0.8396097    |
| loss_kl         | 0.002115217  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.007079284 |
| loss_vf_loss    | 0.0005475708 |
----------------------------------
********** Iteration 2976 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00169 |       0.00000 |       0.00216 

********** Iteration 2981 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00170 |       0.00000 |       0.00362 |       0.00055 |       0.82933
     -0.00325 |       0.00000 |       0.00312 |       0.00108 |       0.82997
     -0.00536 |       0.00000 |       0.00279 |       0.00116 |       0.82871
     -0.00652 |       0.00000 |       0.00259 |       0.00133 |       0.82950
     -0.00738 |       0.00000 |       0.00247 |       0.00145 |       0.82935
     -0.00770 |       0.00000 |       0.00235 |       0.00171 |       0.83103
     -0.00771 |       0.00000 |       0.00229 |       0.00195 |       0.83005
     -0.00863 |       0.00000 |       0.00224 |       0.00217 |       0.82876
     -0.00872 |       0.00000 |       0.00218 |       0.00220 |       0.83100
     -0.00921 |       0.00000 |       0.00215 |       0.00233 |       0.83074
Evaluating losses...
     -0.00958 |       0.00000 |       0.00210 |       0.00238 |     

     -0.00277 |       0.00000 |       0.00102 |       0.00664 |       0.79698
     -0.00359 |       0.00000 |       0.00101 |       0.00572 |       0.79807
     -0.00251 |       0.00000 |       0.00101 |       0.00513 |       0.80094
Evaluating losses...
     -0.00458 |       0.00000 |       0.00099 |       0.00523 |       0.80019
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.18          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 4749          |
| TimeElapsed     | 1.8e+04       |
| TimestepsSoFar  | 12234752      |
| ev_tdlam_before | 0.797         |
| loss_ent        | 0.80018824    |
| loss_kl         | 0.0052275443  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0045777904 |
| loss_vf_loss    | 0.0009895848  |
-----------------------------------
********** Iteration 2987 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00119 |       0.00000 | 

********** Iteration 2992 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00027 |       0.00000 |       0.00393 |       0.00063 |       0.82181
     -0.00441 |       0.00000 |       0.00310 |       0.00079 |       0.82192
     -0.00637 |       0.00000 |       0.00285 |       0.00116 |       0.82243
     -0.00729 |       0.00000 |       0.00266 |       0.00111 |       0.82285
     -0.00778 |       0.00000 |       0.00251 |       0.00135 |       0.82292
     -0.00855 |       0.00000 |       0.00236 |       0.00146 |       0.82306
     -0.00882 |       0.00000 |       0.00223 |       0.00165 |       0.82442
     -0.00934 |       0.00000 |       0.00209 |       0.00167 |       0.82385
     -0.01003 |       0.00000 |       0.00205 |       0.00193 |       0.82594
     -0.01030 |       0.00000 |       0.00198 |       0.00202 |       0.82541
Evaluating losses...
     -0.01098 |       0.00000 |       0.00190 |       0.00213 |     

     -0.00484 |       0.00000 |       0.00031 |       0.00098 |       0.82162
     -0.00557 |       0.00000 |       0.00029 |       0.00111 |       0.82224
     -0.00582 |       0.00000 |       0.00029 |       0.00121 |       0.82265
     -0.00588 |       0.00000 |       0.00026 |       0.00126 |       0.82273
Evaluating losses...
     -0.00630 |       0.00000 |       0.00024 |       0.00135 |       0.82189
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.17          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4763          |
| TimeElapsed     | 1.82e+04      |
| TimestepsSoFar  | 12279808      |
| ev_tdlam_before | 0.542         |
| loss_ent        | 0.82188666    |
| loss_kl         | 0.0013476284  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0063040266 |
| loss_vf_loss    | 0.00024164423 |
-----------------------------------
********** Iteration 2998 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 3003 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00062 |       0.00000 |       0.00033 |       0.00066 |       0.82929
     -0.00319 |       0.00000 |       0.00016 |       0.00139 |       0.82926
     -0.00465 |       0.00000 |       0.00013 |       0.00123 |       0.82988
     -0.00601 |       0.00000 |       0.00010 |       0.00131 |       0.82992
     -0.00648 |       0.00000 |      9.21e-05 |       0.00161 |       0.83172
     -0.00701 |       0.00000 |      8.68e-05 |       0.00162 |       0.83013
     -0.00738 |       0.00000 |      8.08e-05 |       0.00155 |       0.83059
     -0.00783 |       0.00000 |      7.71e-05 |       0.00173 |       0.83150
     -0.00829 |       0.00000 |      7.32e-05 |       0.00183 |       0.83069
     -0.00850 |       0.00000 |      7.13e-05 |       0.00198 |       0.83059
Evaluating losses...
     -0.00918 |       0.00000 |      6.91e-05 |       0.00205 |     

     -0.00701 |       0.00000 |       0.00283 |       0.00254 |       0.81298
     -0.00726 |       0.00000 |       0.00280 |       0.00293 |       0.81316
Evaluating losses...
     -0.00764 |       0.00000 |       0.00274 |       0.00345 |       0.81332
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.24         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4778         |
| TimeElapsed     | 1.83e+04     |
| TimestepsSoFar  | 12324864     |
| ev_tdlam_before | 0.538        |
| loss_ent        | 0.8133237    |
| loss_kl         | 0.0034528887 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.007639244 |
| loss_vf_loss    | 0.002735255  |
----------------------------------
********** Iteration 3009 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00112 |       0.00000 |       0.00282 |       0.00074 |       0.82236
     -0.00302 |       0.00000 |       0.00205 

********** Iteration 3014 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00018 |       0.00000 |       0.00306 |       0.00082 |       0.81421
     -0.00256 |       0.00000 |       0.00182 |       0.00100 |       0.81630
     -0.00380 |       0.00000 |       0.00168 |       0.00130 |       0.81705
     -0.00413 |       0.00000 |       0.00164 |       0.00155 |       0.81718
     -0.00495 |       0.00000 |       0.00158 |       0.00151 |       0.81677
     -0.00488 |       0.00000 |       0.00154 |       0.00160 |       0.81751
     -0.00551 |       0.00000 |       0.00154 |       0.00196 |       0.81846
     -0.00531 |       0.00000 |       0.00150 |       0.00190 |       0.81799
     -0.00613 |       0.00000 |       0.00149 |       0.00200 |       0.81765
     -0.00624 |       0.00000 |       0.00145 |       0.00218 |       0.81835
Evaluating losses...
     -0.00699 |       0.00000 |       0.00142 |       0.00205 |     

     -0.00793 |       0.00000 |       0.00107 |       0.00328 |       0.80415
     -0.00806 |       0.00000 |       0.00107 |       0.00386 |       0.80267
     -0.00802 |       0.00000 |       0.00104 |       0.00323 |       0.80358
Evaluating losses...
     -0.00863 |       0.00000 |       0.00100 |       0.00328 |       0.80331
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.29         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4793         |
| TimeElapsed     | 1.83e+04     |
| TimestepsSoFar  | 12369920     |
| ev_tdlam_before | 0.509        |
| loss_ent        | 0.8033121    |
| loss_kl         | 0.003276307  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008632364 |
| loss_vf_loss    | 0.0010022535 |
----------------------------------
********** Iteration 3020 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00207 |       0.00000 |       0.00026 

********** Iteration 3025 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00201 |       0.00000 |       0.00123 |       0.00022 |       0.82502
     -0.00213 |       0.00000 |       0.00092 |       0.00049 |       0.82488
     -0.00365 |       0.00000 |       0.00093 |       0.00086 |       0.82472
     -0.00362 |       0.00000 |       0.00086 |       0.00099 |       0.82530
     -0.00482 |       0.00000 |       0.00081 |       0.00106 |       0.82375
     -0.00556 |       0.00000 |       0.00079 |       0.00125 |       0.82410
     -0.00555 |       0.00000 |       0.00075 |       0.00133 |       0.82317
     -0.00628 |       0.00000 |       0.00075 |       0.00145 |       0.82332
     -0.00657 |       0.00000 |       0.00073 |       0.00151 |       0.82304
     -0.00670 |       0.00000 |       0.00069 |       0.00163 |       0.82334
Evaluating losses...
     -0.00730 |       0.00000 |       0.00069 |       0.00167 |     

     -0.01023 |       0.00000 |      5.72e-05 |       0.00211 |       0.81105
     -0.01066 |       0.00000 |      5.57e-05 |       0.00227 |       0.80981
Evaluating losses...
     -0.01094 |       0.00000 |      5.43e-05 |       0.00283 |       0.81076
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.3          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4808         |
| TimeElapsed     | 1.84e+04     |
| TimestepsSoFar  | 12414976     |
| ev_tdlam_before | -1.03        |
| loss_ent        | 0.8107591    |
| loss_kl         | 0.0028344698 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010942544 |
| loss_vf_loss    | 5.425956e-05 |
----------------------------------
********** Iteration 3031 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00038 |       0.00000 |       0.00015 |       0.00054 |       0.80466
     -0.00247 |       0.00000 |      8.51e-05 

********** Iteration 3036 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00078 |       0.00000 |       0.00540 |       0.00092 |       0.76532
     -0.00355 |       0.00000 |       0.00477 |       0.00098 |       0.76616
     -0.00425 |       0.00000 |       0.00462 |       0.00094 |       0.76722
     -0.00520 |       0.00000 |       0.00447 |       0.00118 |       0.76663
     -0.00586 |       0.00000 |       0.00426 |       0.00139 |       0.76528
     -0.00611 |       0.00000 |       0.00414 |       0.00159 |       0.76449
     -0.00622 |       0.00000 |       0.00396 |       0.00175 |       0.76557
     -0.00731 |       0.00000 |       0.00383 |       0.00193 |       0.76503
     -0.00705 |       0.00000 |       0.00366 |       0.00195 |       0.76499
     -0.00750 |       0.00000 |       0.00353 |       0.00227 |       0.76523
Evaluating losses...
     -0.00837 |       0.00000 |       0.00340 |       0.00226 |     

     -0.00591 |       0.00000 |       0.00177 |       0.00129 |       0.78779
     -0.00589 |       0.00000 |       0.00174 |       0.00154 |       0.78718
Evaluating losses...
     -0.00624 |       0.00000 |       0.00172 |       0.00163 |       0.78859
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.37         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 4824         |
| TimeElapsed     | 1.84e+04     |
| TimestepsSoFar  | 12460032     |
| ev_tdlam_before | 0.432        |
| loss_ent        | 0.78859377   |
| loss_kl         | 0.0016259141 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.006244875 |
| loss_vf_loss    | 0.0017247408 |
----------------------------------
********** Iteration 3042 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00161 |       0.00000 |       0.00021 |       0.00078 |       0.81278
     -0.00525 |       0.00000 |       0.00015 

********** Iteration 3047 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00164 |       0.00000 |       0.00326 |       0.00060 |       0.79818
     -0.00305 |       0.00000 |       0.00211 |       0.00080 |       0.79639
     -0.00466 |       0.00000 |       0.00186 |       0.00111 |       0.79690
     -0.00586 |       0.00000 |       0.00179 |       0.00117 |       0.79610
     -0.00600 |       0.00000 |       0.00174 |       0.00141 |       0.79545
     -0.00675 |       0.00000 |       0.00171 |       0.00142 |       0.79670
     -0.00691 |       0.00000 |       0.00168 |       0.00149 |       0.79583
     -0.00718 |       0.00000 |       0.00166 |       0.00167 |       0.79623
     -0.00758 |       0.00000 |       0.00164 |       0.00164 |       0.79589
     -0.00782 |       0.00000 |       0.00165 |       0.00171 |       0.79675
Evaluating losses...
     -0.00833 |       0.00000 |       0.00177 |       0.00175 |     

     -0.00697 |       0.00000 |      3.37e-05 |       0.00178 |       0.76746
     -0.00737 |       0.00000 |      3.16e-05 |       0.00176 |       0.76754
     -0.00768 |       0.00000 |      3.03e-05 |       0.00180 |       0.76807
     -0.00781 |       0.00000 |      2.93e-05 |       0.00188 |       0.76794
Evaluating losses...
     -0.00833 |       0.00000 |      2.94e-05 |       0.00187 |       0.76736
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.31          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4838          |
| TimeElapsed     | 1.86e+04      |
| TimestepsSoFar  | 12505088      |
| ev_tdlam_before | -0.668        |
| loss_ent        | 0.76735795    |
| loss_kl         | 0.0018693598  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.008325148  |
| loss_vf_loss    | 2.9388992e-05 |
-----------------------------------
********** Iteration 3053 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 3058 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00064 |       0.00000 |       0.00026 |       0.00073 |       0.82156
     -0.00413 |       0.00000 |       0.00012 |       0.00122 |       0.82168
     -0.00516 |       0.00000 |      9.44e-05 |       0.00107 |       0.82030
     -0.00614 |       0.00000 |      8.43e-05 |       0.00156 |       0.82068
     -0.00693 |       0.00000 |      7.90e-05 |       0.00150 |       0.82059
     -0.00748 |       0.00000 |      7.31e-05 |       0.00160 |       0.82116
     -0.00779 |       0.00000 |      6.91e-05 |       0.00173 |       0.82009
     -0.00825 |       0.00000 |      6.64e-05 |       0.00186 |       0.82131
     -0.00864 |       0.00000 |      6.43e-05 |       0.00196 |       0.82018
     -0.00906 |       0.00000 |      6.18e-05 |       0.00198 |       0.82141
Evaluating losses...
     -0.00998 |       0.00000 |      6.02e-05 |       0.00202 |     

     -0.00896 |       0.00000 |      9.00e-06 |       0.00195 |       0.81391
     -0.00967 |       0.00000 |      8.69e-06 |       0.00193 |       0.81211
     -0.01007 |       0.00000 |      8.28e-06 |       0.00206 |       0.81274
Evaluating losses...
     -0.01022 |       0.00000 |      8.17e-06 |       0.00249 |       0.81186
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.24         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4853         |
| TimeElapsed     | 1.87e+04     |
| TimestepsSoFar  | 12550144     |
| ev_tdlam_before | -1.48        |
| loss_ent        | 0.8118625    |
| loss_kl         | 0.002492903  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010221343 |
| loss_vf_loss    | 8.174745e-06 |
----------------------------------
********** Iteration 3064 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00185 |       0.00000 |       0.00138 

********** Iteration 3069 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00039 |       0.00000 |       0.00073 |       0.00076 |       0.80095
     -0.00300 |       0.00000 |       0.00058 |       0.00141 |       0.80192
     -0.00186 |       0.00000 |       0.00056 |       0.00216 |       0.80160
     -0.00440 |       0.00000 |       0.00055 |       0.00174 |       0.80185
     -0.00493 |       0.00000 |       0.00055 |       0.00233 |       0.80154
     -0.00422 |       0.00000 |       0.00054 |       0.00314 |       0.80265
     -0.00521 |       0.00000 |       0.00053 |       0.00207 |       0.80121
     -0.00536 |       0.00000 |       0.00053 |       0.00217 |       0.80169
     -0.00567 |       0.00000 |       0.00053 |       0.00196 |       0.80138
     -0.00583 |       0.00000 |       0.00053 |       0.00209 |       0.80074
Evaluating losses...
     -0.00627 |       0.00000 |       0.00051 |       0.00211 |     

     -0.00910 |       0.00000 |      1.40e-05 |       0.00171 |       0.82908
     -0.00921 |       0.00000 |      1.38e-05 |       0.00200 |       0.82842
     -0.00976 |       0.00000 |      1.33e-05 |       0.00203 |       0.82839
     -0.01036 |       0.00000 |      1.35e-05 |       0.00212 |       0.82779
Evaluating losses...
     -0.01082 |       0.00000 |      1.31e-05 |       0.00259 |       0.82647
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.14          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4868          |
| TimeElapsed     | 1.87e+04      |
| TimestepsSoFar  | 12595200      |
| ev_tdlam_before | -0.17         |
| loss_ent        | 0.8264659     |
| loss_kl         | 0.0025909129  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.010822831  |
| loss_vf_loss    | 1.3088601e-05 |
-----------------------------------
********** Iteration 3075 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 3080 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00021 |       0.00000 |       0.00136 |       0.00174 |       0.80313
     -0.00190 |       0.00000 |       0.00125 |       0.00220 |       0.80490
     -0.00341 |       0.00000 |       0.00113 |       0.00243 |       0.80389
     -0.00345 |       0.00000 |       0.00108 |       0.00303 |       0.80522
     -0.00368 |       0.00000 |       0.00107 |       0.00198 |       0.80399
     -0.00397 |       0.00000 |       0.00105 |       0.00266 |       0.80534
     -0.00423 |       0.00000 |       0.00103 |       0.00273 |       0.80613
     -0.00424 |       0.00000 |       0.00101 |       0.00282 |       0.80597
     -0.00425 |       0.00000 |       0.00100 |       0.00231 |       0.80500
     -0.00429 |       0.00000 |       0.00099 |       0.00259 |       0.80383
Evaluating losses...
     -0.00468 |       0.00000 |       0.00096 |       0.00331 |     

     -0.00646 |       0.00000 |       0.00065 |       0.00149 |       0.77887
     -0.00658 |       0.00000 |       0.00064 |       0.00161 |       0.77772
     -0.00684 |       0.00000 |       0.00063 |       0.00186 |       0.77829
Evaluating losses...
     -0.00766 |       0.00000 |       0.00061 |       0.00192 |       0.77820
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.1          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4883         |
| TimeElapsed     | 1.87e+04     |
| TimestepsSoFar  | 12640256     |
| ev_tdlam_before | 0.496        |
| loss_ent        | 0.77819884   |
| loss_kl         | 0.0019171749 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.007657762 |
| loss_vf_loss    | 0.0006107305 |
----------------------------------
********** Iteration 3086 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00104 |       0.00000 |       0.00020 

********** Iteration 3091 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00076 |       0.00000 |       0.00180 |       0.00044 |       0.81429
     -0.00330 |       0.00000 |       0.00169 |       0.00070 |       0.81414
     -0.00432 |       0.00000 |       0.00164 |       0.00088 |       0.81454
     -0.00498 |       0.00000 |       0.00160 |       0.00116 |       0.81442
     -0.00570 |       0.00000 |       0.00159 |       0.00130 |       0.81508
     -0.00611 |       0.00000 |       0.00156 |       0.00140 |       0.81657
     -0.00650 |       0.00000 |       0.00155 |       0.00157 |       0.81569
     -0.00671 |       0.00000 |       0.00153 |       0.00155 |       0.81629
     -0.00702 |       0.00000 |       0.00153 |       0.00164 |       0.81701
     -0.00692 |       0.00000 |       0.00152 |       0.00180 |       0.81636
Evaluating losses...
     -0.00750 |       0.00000 |       0.00151 |       0.00187 |     

     -0.00475 |       0.00000 |       0.00068 |       0.00112 |       0.84447
     -0.00430 |       0.00000 |       0.00067 |       0.00109 |       0.84520
     -0.00483 |       0.00000 |       0.00064 |       0.00121 |       0.84367
Evaluating losses...
     -0.00526 |       0.00000 |       0.00062 |       0.00126 |       0.84501
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.08         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4898         |
| TimeElapsed     | 1.88e+04     |
| TimestepsSoFar  | 12685312     |
| ev_tdlam_before | 0.0665       |
| loss_ent        | 0.84500897   |
| loss_kl         | 0.0012634677 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.005261046 |
| loss_vf_loss    | 0.0006158443 |
----------------------------------
********** Iteration 3097 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00022 |       0.00000 |       0.00170 

********** Iteration 3102 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00153 |       0.00000 |       0.00161 |       0.00028 |       0.79569
     -0.00122 |       0.00000 |       0.00105 |       0.00054 |       0.79400
     -0.00107 |       0.00000 |       0.00080 |       0.00134 |       0.79464
     -0.00283 |       0.00000 |       0.00065 |       0.00092 |       0.79554
     -0.00331 |       0.00000 |       0.00057 |       0.00093 |       0.79552
     -0.00354 |       0.00000 |       0.00052 |       0.00106 |       0.79566
     -0.00383 |       0.00000 |       0.00049 |       0.00102 |       0.79632
     -0.00387 |       0.00000 |       0.00046 |       0.00099 |       0.79633
     -0.00404 |       0.00000 |       0.00044 |       0.00106 |       0.79673
     -0.00412 |       0.00000 |       0.00042 |       0.00112 |       0.79716
Evaluating losses...
     -0.00441 |       0.00000 |       0.00041 |       0.00119 |     

     -0.00864 |       0.00000 |      5.06e-05 |       0.00165 |       0.79537
     -0.00890 |       0.00000 |      4.95e-05 |       0.00177 |       0.79475
     -0.00900 |       0.00000 |      4.85e-05 |       0.00196 |       0.79455
Evaluating losses...
     -0.00982 |       0.00000 |      4.71e-05 |       0.00222 |       0.79311
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.11         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4913         |
| TimeElapsed     | 1.88e+04     |
| TimestepsSoFar  | 12730368     |
| ev_tdlam_before | -0.45        |
| loss_ent        | 0.7931101    |
| loss_kl         | 0.002219398  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.009824288 |
| loss_vf_loss    | 4.710876e-05 |
----------------------------------
********** Iteration 3108 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00055 |       0.00000 |       0.00201 

********** Iteration 3113 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00129 |       0.00000 |       0.00130 |       0.00046 |       0.78733
     -0.00213 |       0.00000 |       0.00106 |       0.00113 |       0.78717
     -0.00406 |       0.00000 |       0.00097 |       0.00126 |       0.78849
     -0.00508 |       0.00000 |       0.00093 |       0.00134 |       0.78799
     -0.00539 |       0.00000 |       0.00089 |       0.00151 |       0.78681
     -0.00610 |       0.00000 |       0.00086 |       0.00144 |       0.78857
     -0.00636 |       0.00000 |       0.00085 |       0.00141 |       0.78818
     -0.00659 |       0.00000 |       0.00081 |       0.00159 |       0.78755
     -0.00635 |       0.00000 |       0.00079 |       0.00195 |       0.78893
     -0.00689 |       0.00000 |       0.00077 |       0.00209 |       0.78801
Evaluating losses...
     -0.00747 |       0.00000 |       0.00076 |       0.00197 |     

     -0.00552 |       0.00000 |       0.00135 |       0.00208 |       0.78676
     -0.00466 |       0.00000 |       0.00123 |       0.00252 |       0.78506
     -0.00590 |       0.00000 |       0.00112 |       0.00232 |       0.78726
Evaluating losses...
     -0.00646 |       0.00000 |       0.00106 |       0.00252 |       0.78682
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.08         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4928         |
| TimeElapsed     | 1.9e+04      |
| TimestepsSoFar  | 12775424     |
| ev_tdlam_before | 0.509        |
| loss_ent        | 0.78681993   |
| loss_kl         | 0.0025192671 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.006456789 |
| loss_vf_loss    | 0.0010554838 |
----------------------------------
********** Iteration 3119 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00270 |       0.00000 |       0.00030 

********** Iteration 3124 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00053 |       0.00000 |       0.00246 |       0.00036 |       0.76466
     -0.00188 |       0.00000 |       0.00117 |       0.00107 |       0.76444
     -0.00356 |       0.00000 |       0.00099 |       0.00072 |       0.76488
     -0.00391 |       0.00000 |       0.00088 |       0.00094 |       0.76401
     -0.00430 |       0.00000 |       0.00082 |       0.00118 |       0.76392
     -0.00489 |       0.00000 |       0.00079 |       0.00109 |       0.76378
     -0.00511 |       0.00000 |       0.00076 |       0.00125 |       0.76423
     -0.00582 |       0.00000 |       0.00072 |       0.00139 |       0.76324
     -0.00557 |       0.00000 |       0.00071 |       0.00148 |       0.76409
     -0.00599 |       0.00000 |       0.00069 |       0.00146 |       0.76520
Evaluating losses...
     -0.00668 |       0.00000 |       0.00066 |       0.00144 |     

     -0.00602 |       0.00000 |       0.00038 |       0.00118 |       0.80665
     -0.00608 |       0.00000 |       0.00038 |       0.00120 |       0.80630
     -0.00613 |       0.00000 |       0.00036 |       0.00144 |       0.80741
Evaluating losses...
     -0.00663 |       0.00000 |       0.00034 |       0.00148 |       0.80665
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.09          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4943          |
| TimeElapsed     | 1.91e+04      |
| TimestepsSoFar  | 12820480      |
| ev_tdlam_before | 0.326         |
| loss_ent        | 0.8066526     |
| loss_kl         | 0.0014770218  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.006633403  |
| loss_vf_loss    | 0.00034070172 |
-----------------------------------
********** Iteration 3130 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00124 |       0.00000 | 

********** Iteration 3135 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00074 |       0.00000 |       0.00476 |       0.00064 |       0.78348
     -0.00429 |       0.00000 |       0.00348 |       0.00085 |       0.78509
     -0.00519 |       0.00000 |       0.00301 |       0.00117 |       0.78478
     -0.00622 |       0.00000 |       0.00277 |       0.00138 |       0.78460
     -0.00664 |       0.00000 |       0.00261 |       0.00145 |       0.78411
     -0.00700 |       0.00000 |       0.00246 |       0.00151 |       0.78493
     -0.00723 |       0.00000 |       0.00237 |       0.00166 |       0.78388
     -0.00761 |       0.00000 |       0.00230 |       0.00170 |       0.78454
     -0.00780 |       0.00000 |       0.00229 |       0.00176 |       0.78431
     -0.00797 |       0.00000 |       0.00222 |       0.00180 |       0.78515
Evaluating losses...
     -0.00840 |       0.00000 |       0.00211 |       0.00195 |     

     -0.00523 |       0.00000 |       0.00140 |       0.00158 |       0.77817
     -0.00529 |       0.00000 |       0.00140 |       0.00149 |       0.77737
Evaluating losses...
     -0.00566 |       0.00000 |       0.00138 |       0.00138 |       0.77674
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.09          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4958          |
| TimeElapsed     | 1.91e+04      |
| TimestepsSoFar  | 12865536      |
| ev_tdlam_before | 0.505         |
| loss_ent        | 0.77673817    |
| loss_kl         | 0.001380317   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0056622727 |
| loss_vf_loss    | 0.0013752213  |
-----------------------------------
********** Iteration 3141 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00050 |       0.00000 |      4.55e-05 |       0.00090 |       0.79046
     -0.00442 |       0.00000 | 

********** Iteration 3146 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00019 |       0.00000 |       0.00109 |       0.00032 |       0.82245
     -0.00318 |       0.00000 |       0.00081 |       0.00069 |       0.82197
     -0.00428 |       0.00000 |       0.00074 |       0.00076 |       0.82086
     -0.00510 |       0.00000 |       0.00073 |       0.00091 |       0.82140
     -0.00531 |       0.00000 |       0.00072 |       0.00100 |       0.82245
     -0.00548 |       0.00000 |       0.00071 |       0.00111 |       0.82223
     -0.00560 |       0.00000 |       0.00069 |       0.00125 |       0.82293
     -0.00604 |       0.00000 |       0.00069 |       0.00128 |       0.82274
     -0.00610 |       0.00000 |       0.00069 |       0.00138 |       0.82340
     -0.00630 |       0.00000 |       0.00068 |       0.00143 |       0.82347
Evaluating losses...
     -0.00661 |       0.00000 |       0.00065 |       0.00139 |     

     -0.00626 |       0.00000 |       0.00048 |       0.00145 |       0.79570
     -0.00668 |       0.00000 |       0.00046 |       0.00160 |       0.79609
     -0.00673 |       0.00000 |       0.00043 |       0.00176 |       0.79554
Evaluating losses...
     -0.00763 |       0.00000 |       0.00041 |       0.00192 |       0.79612
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.1          |
| EpThisIter      | 1            |
| EpisodesSoFar   | 4973         |
| TimeElapsed     | 1.92e+04     |
| TimestepsSoFar  | 12910592     |
| ev_tdlam_before | -0.0347      |
| loss_ent        | 0.7961198    |
| loss_kl         | 0.0019154703 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.007625135 |
| loss_vf_loss    | 0.0004143415 |
----------------------------------
********** Iteration 3152 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00083 |       0.00000 |       0.00348 

********** Iteration 3157 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00055 |       0.00000 |       0.00254 |       0.00076 |       0.76702
     -0.00413 |       0.00000 |       0.00185 |       0.00118 |       0.76778
     -0.00506 |       0.00000 |       0.00165 |       0.00106 |       0.76609
     -0.00540 |       0.00000 |       0.00150 |       0.00132 |       0.76514
     -0.00606 |       0.00000 |       0.00139 |       0.00156 |       0.76657
     -0.00631 |       0.00000 |       0.00132 |       0.00155 |       0.76645
     -0.00616 |       0.00000 |       0.00125 |       0.00182 |       0.76647
     -0.00662 |       0.00000 |       0.00120 |       0.00174 |       0.76685
     -0.00690 |       0.00000 |       0.00115 |       0.00196 |       0.76589
     -0.00702 |       0.00000 |       0.00112 |       0.00187 |       0.76691
Evaluating losses...
     -0.00753 |       0.00000 |       0.00106 |       0.00189 |     

     -0.00668 |       0.00000 |      5.74e-05 |       0.00152 |       0.80008
     -0.00711 |       0.00000 |      5.64e-05 |       0.00162 |       0.79825
     -0.00726 |       0.00000 |      5.52e-05 |       0.00174 |       0.79874
Evaluating losses...
     -0.00786 |       0.00000 |      5.39e-05 |       0.00174 |       0.79863
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.15          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 4988          |
| TimeElapsed     | 1.92e+04      |
| TimestepsSoFar  | 12955648      |
| ev_tdlam_before | -3.4          |
| loss_ent        | 0.79862934    |
| loss_kl         | 0.0017419239  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.007863783  |
| loss_vf_loss    | 5.3925894e-05 |
-----------------------------------
********** Iteration 3163 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00163 |       0.00000 | 

********** Iteration 3168 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00017 |       0.00000 |       0.00080 |       0.00046 |       0.77772
     -0.00267 |       0.00000 |       0.00043 |       0.00078 |       0.77741
     -0.00480 |       0.00000 |       0.00034 |       0.00098 |       0.77783
     -0.00581 |       0.00000 |       0.00029 |       0.00130 |       0.77787
     -0.00603 |       0.00000 |       0.00026 |       0.00137 |       0.77905
     -0.00679 |       0.00000 |       0.00024 |       0.00160 |       0.77826
     -0.00716 |       0.00000 |       0.00022 |       0.00153 |       0.77788
     -0.00774 |       0.00000 |       0.00021 |       0.00170 |       0.77766
     -0.00804 |       0.00000 |       0.00020 |       0.00178 |       0.77773
     -0.00825 |       0.00000 |       0.00019 |       0.00200 |       0.77782
Evaluating losses...
     -0.00855 |       0.00000 |       0.00019 |       0.00215 |     

     -0.00798 |       0.00000 |       0.00055 |       0.00165 |       0.82008
     -0.00871 |       0.00000 |       0.00052 |       0.00163 |       0.82028
     -0.00869 |       0.00000 |       0.00050 |       0.00177 |       0.81966
     -0.00870 |       0.00000 |       0.00049 |       0.00187 |       0.82062
Evaluating losses...
     -0.00953 |       0.00000 |       0.00047 |       0.00208 |       0.81988
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.15          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5003          |
| TimeElapsed     | 1.94e+04      |
| TimestepsSoFar  | 13000704      |
| ev_tdlam_before | -0.0915       |
| loss_ent        | 0.8198753     |
| loss_kl         | 0.0020840433  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.009525342  |
| loss_vf_loss    | 0.00047193913 |
-----------------------------------
********** Iteration 3174 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 3179 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00147 |       0.00000 |       0.00157 |       0.00035 |       0.79757
     -0.00185 |       0.00000 |       0.00121 |       0.00054 |       0.79696
     -0.00360 |       0.00000 |       0.00102 |       0.00075 |       0.79671
     -0.00474 |       0.00000 |       0.00084 |       0.00075 |       0.79686
     -0.00473 |       0.00000 |       0.00071 |       0.00100 |       0.79664
     -0.00478 |       0.00000 |       0.00062 |       0.00100 |       0.79662
     -0.00530 |       0.00000 |       0.00057 |       0.00103 |       0.79670
     -0.00563 |       0.00000 |       0.00053 |       0.00112 |       0.79712
     -0.00581 |       0.00000 |       0.00049 |       0.00116 |       0.79637
     -0.00560 |       0.00000 |       0.00047 |       0.00129 |       0.79599
Evaluating losses...
     -0.00608 |       0.00000 |       0.00045 |       0.00123 |     

     -0.00879 |       0.00000 |      3.04e-05 |       0.00183 |       0.78938
     -0.00818 |       0.00000 |      2.95e-05 |       0.00203 |       0.78973
     -0.00962 |       0.00000 |      2.81e-05 |       0.00192 |       0.78961
Evaluating losses...
     -0.01019 |       0.00000 |      2.75e-05 |       0.00203 |       0.78901
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.1           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5018          |
| TimeElapsed     | 1.95e+04      |
| TimestepsSoFar  | 13045760      |
| ev_tdlam_before | -0.496        |
| loss_ent        | 0.78900534    |
| loss_kl         | 0.0020308138  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.010188183  |
| loss_vf_loss    | 2.7539592e-05 |
-----------------------------------
********** Iteration 3185 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00103 |       0.00000 | 

********** Iteration 3190 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     1.90e-05 |       0.00000 |       0.00463 |       0.00160 |       0.79124
     -0.00414 |       0.00000 |       0.00360 |       0.00258 |       0.79585
     -0.00535 |       0.00000 |       0.00331 |       0.00240 |       0.79491
     -0.00655 |       0.00000 |       0.00314 |       0.00214 |       0.79509
     -0.00662 |       0.00000 |       0.00301 |       0.00257 |       0.79563
     -0.00684 |       0.00000 |       0.00298 |       0.00243 |       0.79421
     -0.00760 |       0.00000 |       0.00287 |       0.00218 |       0.79265
     -0.00750 |       0.00000 |       0.00280 |       0.00233 |       0.79310
     -0.00785 |       0.00000 |       0.00274 |       0.00261 |       0.79529
     -0.00835 |       0.00000 |       0.00271 |       0.00225 |       0.79283
Evaluating losses...
     -0.00881 |       0.00000 |       0.00265 |       0.00245 |     

     -0.00966 |       0.00000 |      5.33e-05 |       0.00178 |       0.76893
     -0.01012 |       0.00000 |      5.01e-05 |       0.00183 |       0.76906
Evaluating losses...
     -0.01111 |       0.00000 |      4.78e-05 |       0.00202 |       0.76825
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.12          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5033          |
| TimeElapsed     | 1.95e+04      |
| TimestepsSoFar  | 13090816      |
| ev_tdlam_before | -0.215        |
| loss_ent        | 0.76824844    |
| loss_kl         | 0.0020220038  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.011109188  |
| loss_vf_loss    | 4.7770212e-05 |
-----------------------------------
********** Iteration 3196 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00015 |       0.00000 |       0.00087 |       0.00063 |       0.74920
     -0.00233 |       0.00000 | 

********** Iteration 3201 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00896 |       0.00000 |       0.00174 |       0.00430 |       0.74999
     -0.00129 |       0.00000 |       0.00165 |       0.00455 |       0.74922
     -0.00237 |       0.00000 |       0.00162 |       0.00202 |       0.74763
     -0.00363 |       0.00000 |       0.00161 |       0.00180 |       0.74786
     -0.00424 |       0.00000 |       0.00160 |       0.00205 |       0.74847
     -0.00453 |       0.00000 |       0.00159 |       0.00237 |       0.74950
     -0.00505 |       0.00000 |       0.00157 |       0.00243 |       0.74892
     -0.00544 |       0.00000 |       0.00158 |       0.00272 |       0.74874
     -0.00568 |       0.00000 |       0.00157 |       0.00239 |       0.74959
     -0.00579 |       0.00000 |       0.00155 |       0.00256 |       0.75067
Evaluating losses...
     -0.00613 |       0.00000 |       0.00151 |       0.00267 |     

     -0.00709 |       0.00000 |       0.00510 |       0.00278 |       0.79616
     -0.00730 |       0.00000 |       0.00502 |       0.00271 |       0.79546
     -0.00747 |       0.00000 |       0.00495 |       0.00292 |       0.79598
Evaluating losses...
     -0.00826 |       0.00000 |       0.00486 |       0.00285 |       0.79524
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.08         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 5048         |
| TimeElapsed     | 1.96e+04     |
| TimestepsSoFar  | 13135872     |
| ev_tdlam_before | 0.515        |
| loss_ent        | 0.7952378    |
| loss_kl         | 0.0028488124 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008260174 |
| loss_vf_loss    | 0.004858939  |
----------------------------------
********** Iteration 3207 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00032 |       0.00000 |       0.00246 

********** Iteration 3212 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00150 |       0.00000 |      6.38e-05 |       0.00041 |       0.81480
     -0.00255 |       0.00000 |      4.29e-05 |       0.00065 |       0.81362
     -0.00354 |       0.00000 |      3.68e-05 |       0.00072 |       0.81302
     -0.00465 |       0.00000 |      3.23e-05 |       0.00083 |       0.81336
     -0.00506 |       0.00000 |      3.03e-05 |       0.00098 |       0.81320
     -0.00573 |       0.00000 |      2.84e-05 |       0.00102 |       0.81373
     -0.00580 |       0.00000 |      2.70e-05 |       0.00115 |       0.81390
     -0.00639 |       0.00000 |      2.60e-05 |       0.00126 |       0.81479
     -0.00665 |       0.00000 |      2.48e-05 |       0.00130 |       0.81451
     -0.00701 |       0.00000 |      2.37e-05 |       0.00139 |       0.81453
Evaluating losses...
     -0.00753 |       0.00000 |      2.34e-05 |       0.00144 |     

     -0.00941 |       0.00000 |      8.37e-05 |       0.00185 |       0.79395
     -0.00965 |       0.00000 |      7.96e-05 |       0.00199 |       0.79457
     -0.00975 |       0.00000 |      7.62e-05 |       0.00219 |       0.79391
Evaluating losses...
     -0.01050 |       0.00000 |      7.38e-05 |       0.00207 |       0.79498
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.14         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 5063         |
| TimeElapsed     | 1.96e+04     |
| TimestepsSoFar  | 13180928     |
| ev_tdlam_before | -1.21        |
| loss_ent        | 0.7949834    |
| loss_kl         | 0.002072024  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.010498037 |
| loss_vf_loss    | 7.384613e-05 |
----------------------------------
********** Iteration 3218 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00024 |       0.00000 |       0.00279 

********** Iteration 3223 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00057 |       0.00000 |       0.00018 |       0.00054 |       0.80114
     -0.00337 |       0.00000 |       0.00012 |       0.00071 |       0.79969
     -0.00468 |       0.00000 |      9.48e-05 |       0.00095 |       0.79879
     -0.00612 |       0.00000 |      8.29e-05 |       0.00115 |       0.79877
     -0.00626 |       0.00000 |      7.53e-05 |       0.00136 |       0.79844
     -0.00720 |       0.00000 |      6.96e-05 |       0.00148 |       0.79889
     -0.00786 |       0.00000 |      6.60e-05 |       0.00154 |       0.79820
     -0.00815 |       0.00000 |      6.19e-05 |       0.00169 |       0.79797
     -0.00813 |       0.00000 |      5.96e-05 |       0.00187 |       0.79675
     -0.00873 |       0.00000 |      5.78e-05 |       0.00194 |       0.79682
Evaluating losses...
     -0.00930 |       0.00000 |      5.45e-05 |       0.00196 |     

     -0.00746 |       0.00000 |      3.06e-05 |       0.00164 |       0.81825
     -0.00749 |       0.00000 |      2.99e-05 |       0.00178 |       0.81807
     -0.00777 |       0.00000 |      2.86e-05 |       0.00184 |       0.81756
Evaluating losses...
     -0.00860 |       0.00000 |      2.80e-05 |       0.00181 |       0.81808
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.14         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 5078         |
| TimeElapsed     | 1.97e+04     |
| TimestepsSoFar  | 13225984     |
| ev_tdlam_before | -0.922       |
| loss_ent        | 0.8180836    |
| loss_kl         | 0.0018069594 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008600167 |
| loss_vf_loss    | 2.796062e-05 |
----------------------------------
********** Iteration 3229 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -6.89e-06 |       0.00000 |       0.00315 

********** Iteration 3234 ************
Eval num_timesteps=13246464, episode_reward=0.39 +/- 0.89
Episode length: 3000.00 +/- 0.00
New best mean reward!
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -7.13e-06 |       0.00000 |       0.00013 |       0.00074 |       0.80912
     -0.00406 |       0.00000 |      8.73e-05 |       0.00113 |       0.80866
     -0.00562 |       0.00000 |      7.43e-05 |       0.00147 |       0.80940
     -0.00679 |       0.00000 |      6.55e-05 |       0.00126 |       0.80816
     -0.00748 |       0.00000 |      6.05e-05 |       0.00129 |       0.81020
     -0.00778 |       0.00000 |      5.66e-05 |       0.00147 |       0.80883
     -0.00831 |       0.00000 |      5.36e-05 |       0.00172 |       0.80906
     -0.00861 |       0.00000 |      5.14e-05 |       0.00175 |       0.80933
     -0.00914 |       0.00000 |      4.93e-05 |       0.00184 |       0.80788
     -0.00944 |       0.00000 |      4.73e-05 |       

     -0.00638 |       0.00000 |       0.00217 |       0.00137 |       0.81043
     -0.00637 |       0.00000 |       0.00212 |       0.00151 |       0.81042
     -0.00645 |       0.00000 |       0.00206 |       0.00153 |       0.81139
     -0.00667 |       0.00000 |       0.00201 |       0.00182 |       0.81076
     -0.00701 |       0.00000 |       0.00196 |       0.00178 |       0.81135
Evaluating losses...
     -0.00740 |       0.00000 |       0.00188 |       0.00181 |       0.81130
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.12          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5093          |
| TimeElapsed     | 1.99e+04      |
| TimestepsSoFar  | 13271040      |
| ev_tdlam_before | 0.296         |
| loss_ent        | 0.811305      |
| loss_kl         | 0.0018137529  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0074026138 |
| loss_vf_loss    | 0.0018837365  |
-----------------------------------
*******

********** Iteration 3245 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00137 |       0.00000 |       0.00068 |       0.00041 |       0.79351
      0.00046 |       0.00000 |       0.00050 |       0.00109 |       0.79231
     -0.00304 |       0.00000 |       0.00048 |       0.00094 |       0.79316
     -0.00229 |       0.00000 |       0.00047 |       0.00112 |       0.79447
     -0.00348 |       0.00000 |       0.00046 |       0.00131 |       0.79444
     -0.00441 |       0.00000 |       0.00045 |       0.00134 |       0.79296
     -0.00447 |       0.00000 |       0.00045 |       0.00134 |       0.79381
     -0.00432 |       0.00000 |       0.00044 |       0.00143 |       0.79415
     -0.00427 |       0.00000 |       0.00044 |       0.00148 |       0.79544
     -0.00411 |       0.00000 |       0.00043 |       0.00162 |       0.79510
Evaluating losses...
     -0.00516 |       0.00000 |       0.00042 |       0.00169 |     

     -0.00481 |       0.00000 |       0.00112 |       0.00096 |       0.77939
     -0.00461 |       0.00000 |       0.00106 |       0.00126 |       0.77956
     -0.00471 |       0.00000 |       0.00102 |       0.00142 |       0.77912
Evaluating losses...
     -0.00574 |       0.00000 |       0.00096 |       0.00115 |       0.77919
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.11         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 5108         |
| TimeElapsed     | 1.99e+04     |
| TimestepsSoFar  | 13316096     |
| ev_tdlam_before | 0.728        |
| loss_ent        | 0.7791949    |
| loss_kl         | 0.001145039  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.005742614 |
| loss_vf_loss    | 0.0009579016 |
----------------------------------
********** Iteration 3251 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00085 |       0.00000 |       0.00302 

********** Iteration 3256 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00081 |       0.00000 |      1.70e-05 |       0.00060 |       0.80488
     -0.00394 |       0.00000 |      1.43e-05 |       0.00091 |       0.80541
     -0.00547 |       0.00000 |      1.34e-05 |       0.00106 |       0.80694
     -0.00637 |       0.00000 |      1.30e-05 |       0.00123 |       0.80735
     -0.00693 |       0.00000 |      1.23e-05 |       0.00135 |       0.80720
     -0.00781 |       0.00000 |      1.18e-05 |       0.00151 |       0.80773
     -0.00877 |       0.00000 |      1.19e-05 |       0.00158 |       0.80787
     -0.00865 |       0.00000 |      1.15e-05 |       0.00165 |       0.80670
     -0.00946 |       0.00000 |      1.10e-05 |       0.00160 |       0.80777
     -0.00988 |       0.00000 |      1.05e-05 |       0.00166 |       0.80765
Evaluating losses...
     -0.01078 |       0.00000 |      1.01e-05 |       0.00166 |     

     -0.00470 |       0.00000 |       0.00094 |       0.00194 |       0.80740
     -0.00496 |       0.00000 |       0.00088 |       0.00213 |       0.80672
     -0.00510 |       0.00000 |       0.00084 |       0.00228 |       0.80737
Evaluating losses...
     -0.00505 |       0.00000 |       0.00077 |       0.00269 |       0.80826
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.12          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5123          |
| TimeElapsed     | 1.99e+04      |
| TimestepsSoFar  | 13361152      |
| ev_tdlam_before | 0.0298        |
| loss_ent        | 0.80825555    |
| loss_kl         | 0.002689833   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0050481483 |
| loss_vf_loss    | 0.0007743145  |
-----------------------------------
********** Iteration 3262 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00065 |       0.00000 | 

********** Iteration 3267 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00094 |       0.00000 |       0.00323 |       0.00046 |       0.77215
     -0.00248 |       0.00000 |       0.00273 |       0.00080 |       0.77140
     -0.00347 |       0.00000 |       0.00254 |       0.00109 |       0.77368
     -0.00376 |       0.00000 |       0.00245 |       0.00104 |       0.77242
     -0.00406 |       0.00000 |       0.00242 |       0.00111 |       0.77369
     -0.00462 |       0.00000 |       0.00236 |       0.00116 |       0.77366
     -0.00438 |       0.00000 |       0.00234 |       0.00124 |       0.77268
     -0.00490 |       0.00000 |       0.00232 |       0.00117 |       0.77366
     -0.00545 |       0.00000 |       0.00230 |       0.00127 |       0.77415
     -0.00547 |       0.00000 |       0.00228 |       0.00137 |       0.77352
Evaluating losses...
     -0.00599 |       0.00000 |       0.00224 |       0.00136 |     

     -0.00791 |       0.00000 |       0.00064 |       0.00165 |       0.82896
     -0.00848 |       0.00000 |       0.00063 |       0.00165 |       0.82806
Evaluating losses...
     -0.00922 |       0.00000 |       0.00062 |       0.00157 |       0.82930
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.23          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5138          |
| TimeElapsed     | 2e+04         |
| TimestepsSoFar  | 13406208      |
| ev_tdlam_before | -0.895        |
| loss_ent        | 0.82930326    |
| loss_kl         | 0.0015681155  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.009217118  |
| loss_vf_loss    | 0.00062073226 |
-----------------------------------
********** Iteration 3273 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00026 |       0.00000 |       0.00329 |       0.00060 |       0.81675
     -0.00294 |       0.00000 | 

********** Iteration 3278 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00176 |       0.00000 |       0.00013 |       0.00078 |       0.78028
     -0.00398 |       0.00000 |       0.00011 |       0.00105 |       0.78154
     -0.00567 |       0.00000 |      9.41e-05 |       0.00116 |       0.78189
     -0.00663 |       0.00000 |      8.54e-05 |       0.00129 |       0.78255
     -0.00772 |       0.00000 |      7.93e-05 |       0.00142 |       0.78250
     -0.00810 |       0.00000 |      7.44e-05 |       0.00164 |       0.78346
     -0.00888 |       0.00000 |      6.95e-05 |       0.00165 |       0.78265
     -0.00897 |       0.00000 |      6.59e-05 |       0.00174 |       0.78307
     -0.00947 |       0.00000 |      6.31e-05 |       0.00196 |       0.78334
     -0.00984 |       0.00000 |      5.95e-05 |       0.00204 |       0.78436
Evaluating losses...
     -0.01097 |       0.00000 |      5.89e-05 |       0.00197 |     

     -0.00813 |       0.00000 |      6.65e-05 |       0.00150 |       0.79646
     -0.00795 |       0.00000 |      6.38e-05 |       0.00186 |       0.79780
     -0.00903 |       0.00000 |      6.18e-05 |       0.00169 |       0.79687
Evaluating losses...
     -0.00958 |       0.00000 |      6.04e-05 |       0.00178 |       0.79654
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.25          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5153          |
| TimeElapsed     | 2e+04         |
| TimestepsSoFar  | 13451264      |
| ev_tdlam_before | 0.0113        |
| loss_ent        | 0.7965424     |
| loss_kl         | 0.0017761338  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.009581741  |
| loss_vf_loss    | 6.0427123e-05 |
-----------------------------------
********** Iteration 3284 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00024 |       0.00000 | 

********** Iteration 3289 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00096 |       0.00000 |       0.00223 |       0.00039 |       0.77954
     -0.00198 |       0.00000 |       0.00189 |       0.00080 |       0.77883
     -0.00305 |       0.00000 |       0.00154 |       0.00083 |       0.77831
     -0.00390 |       0.00000 |       0.00116 |       0.00081 |       0.77790
     -0.00399 |       0.00000 |       0.00077 |       0.00094 |       0.77890
     -0.00441 |       0.00000 |       0.00055 |       0.00090 |       0.77846
     -0.00461 |       0.00000 |       0.00045 |       0.00097 |       0.77892
     -0.00474 |       0.00000 |       0.00040 |       0.00100 |       0.77900
     -0.00483 |       0.00000 |       0.00038 |       0.00102 |       0.77979
     -0.00487 |       0.00000 |       0.00035 |       0.00103 |       0.77976
Evaluating losses...
     -0.00509 |       0.00000 |       0.00032 |       0.00112 |     

     -0.00357 |       0.00000 |       0.00146 |       0.00104 |       0.77500
     -0.00375 |       0.00000 |       0.00144 |       0.00099 |       0.77452
     -0.00395 |       0.00000 |       0.00140 |       0.00110 |       0.77509
     -0.00408 |       0.00000 |       0.00137 |       0.00124 |       0.77513
Evaluating losses...
     -0.00448 |       0.00000 |       0.00135 |       0.00130 |       0.77494
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.26         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 5168         |
| TimeElapsed     | 2e+04        |
| TimestepsSoFar  | 13496320     |
| ev_tdlam_before | 0.308        |
| loss_ent        | 0.7749367    |
| loss_kl         | 0.0012959485 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.004475386 |
| loss_vf_loss    | 0.0013459799 |
----------------------------------
********** Iteration 3295 ************
Eval num_timesteps=13496320, episode_reward=0.18 +/- 0.90
Ep

********** Iteration 3300 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00306 |       0.00000 |       0.00071 |       0.00027 |       0.79834
     7.81e-05 |       0.00000 |       0.00038 |       0.00064 |       0.79879
     -0.00055 |       0.00000 |       0.00034 |       0.00050 |       0.79748
     -0.00052 |       0.00000 |       0.00031 |       0.00060 |       0.79731
     -0.00157 |       0.00000 |       0.00030 |       0.00073 |       0.79713
     -0.00236 |       0.00000 |       0.00029 |       0.00093 |       0.79715
     -0.00224 |       0.00000 |       0.00029 |       0.00081 |       0.79629
     -0.00267 |       0.00000 |       0.00028 |       0.00088 |       0.79584
     -0.00202 |       0.00000 |       0.00027 |       0.00093 |       0.79593
     -0.00301 |       0.00000 |       0.00026 |       0.00102 |       0.79646
Evaluating losses...
     -0.00403 |       0.00000 |       0.00027 |       0.00097 |     

     -0.00430 |       0.00000 |       0.00030 |       0.00107 |       0.80448
     -0.00493 |       0.00000 |       0.00031 |       0.00117 |       0.80448
     -0.00399 |       0.00000 |       0.00030 |       0.00142 |       0.80453
Evaluating losses...
     -0.00457 |       0.00000 |       0.00028 |       0.00296 |       0.80283
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.21          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5182          |
| TimeElapsed     | 2.02e+04      |
| TimestepsSoFar  | 13541376      |
| ev_tdlam_before | 0.361         |
| loss_ent        | 0.8028282     |
| loss_kl         | 0.0029641134  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0045711296 |
| loss_vf_loss    | 0.0002802336  |
-----------------------------------
********** Iteration 3306 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00318 |       0.00000 | 

********** Iteration 3311 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00070 |       0.00000 |       0.00312 |       0.00116 |       0.77137
     -0.00242 |       0.00000 |       0.00256 |       0.00209 |       0.77200
     -0.00376 |       0.00000 |       0.00246 |       0.00182 |       0.77056
     -0.00476 |       0.00000 |       0.00240 |       0.00198 |       0.77066
     -0.00533 |       0.00000 |       0.00236 |       0.00189 |       0.76898
     -0.00599 |       0.00000 |       0.00232 |       0.00231 |       0.76918
     -0.00619 |       0.00000 |       0.00232 |       0.00245 |       0.76886
     -0.00659 |       0.00000 |       0.00230 |       0.00273 |       0.76796
     -0.00684 |       0.00000 |       0.00228 |       0.00295 |       0.76815
     -0.00684 |       0.00000 |       0.00227 |       0.00301 |       0.76720
Evaluating losses...
     -0.00734 |       0.00000 |       0.00224 |       0.00322 |     

     -0.00470 |       0.00000 |       0.00185 |       0.00149 |       0.80058
     -0.00501 |       0.00000 |       0.00184 |       0.00154 |       0.80136
     -0.00519 |       0.00000 |       0.00184 |       0.00152 |       0.80128
Evaluating losses...
     -0.00549 |       0.00000 |       0.00180 |       0.00155 |       0.80101
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.23         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 5197         |
| TimeElapsed     | 2.03e+04     |
| TimestepsSoFar  | 13586432     |
| ev_tdlam_before | 0.73         |
| loss_ent        | 0.80100596   |
| loss_kl         | 0.0015521129 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.005493914 |
| loss_vf_loss    | 0.0018043743 |
----------------------------------
********** Iteration 3317 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00022 |       0.00000 |       0.00319 

********** Iteration 3322 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00078 |       0.00000 |       0.00340 |       0.00064 |       0.75821
     -0.00379 |       0.00000 |       0.00301 |       0.00129 |       0.75897
     -0.00466 |       0.00000 |       0.00288 |       0.00144 |       0.75745
     -0.00490 |       0.00000 |       0.00280 |       0.00182 |       0.75871
     -0.00544 |       0.00000 |       0.00278 |       0.00171 |       0.75775
     -0.00576 |       0.00000 |       0.00271 |       0.00252 |       0.75947
     -0.00606 |       0.00000 |       0.00267 |       0.00224 |       0.75912
     -0.00605 |       0.00000 |       0.00263 |       0.00211 |       0.75870
     -0.00638 |       0.00000 |       0.00263 |       0.00242 |       0.75895
     -0.00666 |       0.00000 |       0.00257 |       0.00225 |       0.75750
Evaluating losses...
     -0.00686 |       0.00000 |       0.00253 |       0.00231 |     

     -0.00464 |       0.00000 |       0.00160 |       0.00096 |       0.77319
     -0.00475 |       0.00000 |       0.00157 |       0.00094 |       0.77328
Evaluating losses...
     -0.00493 |       0.00000 |       0.00152 |       0.00090 |       0.77343
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.22          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5212          |
| TimeElapsed     | 2.03e+04      |
| TimestepsSoFar  | 13631488      |
| ev_tdlam_before | 0.388         |
| loss_ent        | 0.77343094    |
| loss_kl         | 0.0008989614  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0049277577 |
| loss_vf_loss    | 0.0015234165  |
-----------------------------------
********** Iteration 3328 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00043 |       0.00000 |       0.00137 |       0.00036 |       0.76043
     -0.00230 |       0.00000 | 

********** Iteration 3333 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00309 |       0.00000 |       0.00176 |       0.00069 |       0.78418
     -0.00022 |       0.00000 |       0.00090 |       0.00124 |       0.78432
     -0.00177 |       0.00000 |       0.00070 |       0.00115 |       0.78481
     -0.00333 |       0.00000 |       0.00060 |       0.00117 |       0.78578
     -0.00463 |       0.00000 |       0.00053 |       0.00128 |       0.78639
     -0.00460 |       0.00000 |       0.00047 |       0.00144 |       0.78520
     -0.00509 |       0.00000 |       0.00045 |       0.00187 |       0.78573
     -0.00470 |       0.00000 |       0.00042 |       0.00169 |       0.78640
     -0.00551 |       0.00000 |       0.00040 |       0.00183 |       0.78562
     -0.00557 |       0.00000 |       0.00039 |       0.00210 |       0.78682
Evaluating losses...
     -0.00574 |       0.00000 |       0.00038 |       0.00226 |     

     -0.00587 |       0.00000 |       0.00364 |       0.00306 |       0.79110
     -0.00616 |       0.00000 |       0.00351 |       0.00361 |       0.79121
     -0.00628 |       0.00000 |       0.00346 |       0.00314 |       0.79092
Evaluating losses...
     -0.00671 |       0.00000 |       0.00335 |       0.00291 |       0.79079
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.21          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5227          |
| TimeElapsed     | 2.04e+04      |
| TimestepsSoFar  | 13676544      |
| ev_tdlam_before | 0.493         |
| loss_ent        | 0.7907902     |
| loss_kl         | 0.0029148364  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0067130304 |
| loss_vf_loss    | 0.0033533736  |
-----------------------------------
********** Iteration 3339 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00079 |       0.00000 | 

********** Iteration 3344 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00015 |       0.00000 |       0.00070 |       0.00067 |       0.75741
     -0.00233 |       0.00000 |       0.00036 |       0.00083 |       0.75558
     -0.00293 |       0.00000 |       0.00029 |       0.00082 |       0.75504
     -0.00362 |       0.00000 |       0.00025 |       0.00095 |       0.75498
     -0.00362 |       0.00000 |       0.00022 |       0.00106 |       0.75464
     -0.00389 |       0.00000 |       0.00020 |       0.00156 |       0.75630
     -0.00402 |       0.00000 |       0.00019 |       0.00162 |       0.75461
     -0.00439 |       0.00000 |       0.00018 |       0.00152 |       0.75427
     -0.00442 |       0.00000 |       0.00017 |       0.00147 |       0.75536
     -0.00449 |       0.00000 |       0.00017 |       0.00148 |       0.75500
Evaluating losses...
     -0.00479 |       0.00000 |       0.00017 |       0.00149 |     

     -0.00462 |       0.00000 |       0.00167 |       0.00161 |       0.75859
     -0.00481 |       0.00000 |       0.00166 |       0.00157 |       0.75841
     -0.00503 |       0.00000 |       0.00165 |       0.00172 |       0.75963
Evaluating losses...
     -0.00566 |       0.00000 |       0.00162 |       0.00180 |       0.75946
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.13         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 5242         |
| TimeElapsed     | 2.04e+04     |
| TimestepsSoFar  | 13721600     |
| ev_tdlam_before | 0.487        |
| loss_ent        | 0.7594551    |
| loss_kl         | 0.0018035943 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.005663779 |
| loss_vf_loss    | 0.0016176571 |
----------------------------------
********** Iteration 3350 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00061 |       0.00000 |       0.00016 

********** Iteration 3355 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00115 |       0.00000 |       0.00399 |       0.00049 |       0.81088
     -0.00217 |       0.00000 |       0.00316 |       0.00091 |       0.80969
     -0.00359 |       0.00000 |       0.00242 |       0.00092 |       0.81126
     -0.00406 |       0.00000 |       0.00218 |       0.00107 |       0.81067
     -0.00490 |       0.00000 |       0.00206 |       0.00106 |       0.81118
     -0.00546 |       0.00000 |       0.00197 |       0.00112 |       0.81078
     -0.00590 |       0.00000 |       0.00188 |       0.00124 |       0.81092
     -0.00606 |       0.00000 |       0.00182 |       0.00132 |       0.81189
     -0.00622 |       0.00000 |       0.00177 |       0.00140 |       0.81146
     -0.00619 |       0.00000 |       0.00172 |       0.00151 |       0.81160
Evaluating losses...
     -0.00671 |       0.00000 |       0.00169 |       0.00156 |     

     -0.00342 |       0.00000 |       0.00049 |       0.00086 |       0.78484
     -0.00230 |       0.00000 |       0.00048 |       0.00092 |       0.78522
     -0.00347 |       0.00000 |       0.00046 |       0.00106 |       0.78439
     -0.00370 |       0.00000 |       0.00045 |       0.00101 |       0.78498
Evaluating losses...
     -0.00494 |       0.00000 |       0.00043 |       0.00106 |       0.78533
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.18          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5257          |
| TimeElapsed     | 2.06e+04      |
| TimestepsSoFar  | 13766656      |
| ev_tdlam_before | 0.676         |
| loss_ent        | 0.7853316     |
| loss_kl         | 0.0010640667  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0049379934 |
| loss_vf_loss    | 0.00042728506 |
-----------------------------------
********** Iteration 3361 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 3366 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00101 |       0.00000 |       0.00307 |       0.00409 |       0.73790
     -0.00259 |       0.00000 |       0.00265 |       0.00741 |       0.74065
     -0.00350 |       0.00000 |       0.00252 |       0.00599 |       0.73981
     -0.00387 |       0.00000 |       0.00241 |       0.00417 |       0.73904
     -0.00418 |       0.00000 |       0.00228 |       0.00276 |       0.73782
     -0.00412 |       0.00000 |       0.00191 |       0.00454 |       0.73846
     -0.00414 |       0.00000 |       0.00156 |       0.00432 |       0.74009
     -0.00441 |       0.00000 |       0.00139 |       0.00546 |       0.74065
     -0.00476 |       0.00000 |       0.00130 |       0.00647 |       0.74021
     -0.00495 |       0.00000 |       0.00123 |       0.00448 |       0.73895
Evaluating losses...
     -0.00529 |       0.00000 |       0.00116 |       0.00373 |     

     -0.00609 |       0.00000 |       0.00106 |       0.00121 |       0.78332
     -0.00632 |       0.00000 |       0.00104 |       0.00122 |       0.78305
     -0.00668 |       0.00000 |       0.00099 |       0.00128 |       0.78286
Evaluating losses...
     -0.00701 |       0.00000 |       0.00099 |       0.00131 |       0.78303
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.18          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5272          |
| TimeElapsed     | 2.06e+04      |
| TimestepsSoFar  | 13811712      |
| ev_tdlam_before | 0.609         |
| loss_ent        | 0.7830255     |
| loss_kl         | 0.0013056913  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0070135994 |
| loss_vf_loss    | 0.0009860825  |
-----------------------------------
********** Iteration 3372 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00127 |       0.00000 | 

********** Iteration 3377 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00075 |       0.00000 |       0.00131 |       0.00032 |       0.76506
     -0.00230 |       0.00000 |       0.00108 |       0.00050 |       0.76458
     -0.00365 |       0.00000 |       0.00096 |       0.00066 |       0.76457
     -0.00434 |       0.00000 |       0.00091 |       0.00070 |       0.76442
     -0.00500 |       0.00000 |       0.00087 |       0.00072 |       0.76476
     -0.00516 |       0.00000 |       0.00085 |       0.00083 |       0.76507
     -0.00563 |       0.00000 |       0.00084 |       0.00086 |       0.76421
     -0.00569 |       0.00000 |       0.00083 |       0.00100 |       0.76540
     -0.00591 |       0.00000 |       0.00080 |       0.00107 |       0.76441
     -0.00633 |       0.00000 |       0.00079 |       0.00113 |       0.76494
Evaluating losses...
     -0.00710 |       0.00000 |       0.00075 |       0.00112 |     

     -0.00442 |       0.00000 |       0.00014 |       0.00106 |       0.77657
     -0.00467 |       0.00000 |       0.00014 |       0.00105 |       0.77626
     -0.00483 |       0.00000 |       0.00013 |       0.00106 |       0.77661
Evaluating losses...
     -0.00514 |       0.00000 |       0.00013 |       0.00106 |       0.77629
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.29          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5287          |
| TimeElapsed     | 2.07e+04      |
| TimestepsSoFar  | 13856768      |
| ev_tdlam_before | -1.03         |
| loss_ent        | 0.77628875    |
| loss_kl         | 0.0010580263  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0051390165 |
| loss_vf_loss    | 0.0001295985  |
-----------------------------------
********** Iteration 3383 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00059 |       0.00000 | 

********** Iteration 3388 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00047 |       0.00000 |       0.00012 |       0.00031 |       0.76879
     -0.00242 |       0.00000 |      9.07e-05 |       0.00053 |       0.76779
     -0.00407 |       0.00000 |      7.44e-05 |       0.00082 |       0.76753
     -0.00483 |       0.00000 |      6.40e-05 |       0.00094 |       0.76786
     -0.00546 |       0.00000 |      5.72e-05 |       0.00102 |       0.76780
     -0.00593 |       0.00000 |      5.20e-05 |       0.00138 |       0.76662
     -0.00646 |       0.00000 |      4.78e-05 |       0.00133 |       0.76743
     -0.00661 |       0.00000 |      4.49e-05 |       0.00140 |       0.76737
     -0.00710 |       0.00000 |      4.21e-05 |       0.00151 |       0.76829
     -0.00730 |       0.00000 |      4.00e-05 |       0.00154 |       0.76750
Evaluating losses...
     -0.00796 |       0.00000 |      3.84e-05 |       0.00148 |     

     -0.00341 |       0.00000 |       0.00059 |       0.00108 |       0.77187
     -0.00362 |       0.00000 |       0.00058 |       0.00109 |       0.77070
     -0.00387 |       0.00000 |       0.00054 |       0.00114 |       0.77129
Evaluating losses...
     -0.00447 |       0.00000 |       0.00052 |       0.00114 |       0.77123
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.29          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5302          |
| TimeElapsed     | 2.07e+04      |
| TimestepsSoFar  | 13901824      |
| ev_tdlam_before | 0.727         |
| loss_ent        | 0.7712343     |
| loss_kl         | 0.0011385867  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0044715633 |
| loss_vf_loss    | 0.0005163819  |
-----------------------------------
********** Iteration 3394 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00228 |       0.00000 | 

********** Iteration 3399 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00033 |       0.00000 |       0.00040 |       0.00051 |       0.79526
     -0.00297 |       0.00000 |       0.00027 |       0.00071 |       0.79615
     -0.00468 |       0.00000 |       0.00021 |       0.00079 |       0.79653
     -0.00557 |       0.00000 |       0.00019 |       0.00094 |       0.79637
     -0.00624 |       0.00000 |       0.00017 |       0.00090 |       0.79684
     -0.00645 |       0.00000 |       0.00015 |       0.00110 |       0.79605
     -0.00688 |       0.00000 |       0.00014 |       0.00120 |       0.79609
     -0.00723 |       0.00000 |       0.00014 |       0.00117 |       0.79668
     -0.00771 |       0.00000 |       0.00013 |       0.00132 |       0.79622
     -0.00781 |       0.00000 |       0.00012 |       0.00136 |       0.79637
Evaluating losses...
     -0.00853 |       0.00000 |       0.00012 |       0.00127 |     

     -0.00671 |       0.00000 |       0.00018 |       0.00101 |       0.78948
     -0.00647 |       0.00000 |       0.00017 |       0.00104 |       0.78883
     -0.00700 |       0.00000 |       0.00017 |       0.00113 |       0.78948
Evaluating losses...
     -0.00751 |       0.00000 |       0.00016 |       0.00110 |       0.78920
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.31          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5317          |
| TimeElapsed     | 2.08e+04      |
| TimestepsSoFar  | 13946880      |
| ev_tdlam_before | -0.141        |
| loss_ent        | 0.78920484    |
| loss_kl         | 0.0011038063  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.007507651  |
| loss_vf_loss    | 0.00016067376 |
-----------------------------------
********** Iteration 3405 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00056 |       0.00000 | 

********** Iteration 3410 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00019 |       0.00000 |       0.00191 |       0.00023 |       0.77609
     -0.00258 |       0.00000 |       0.00139 |       0.00048 |       0.77625
     -0.00305 |       0.00000 |       0.00111 |       0.00066 |       0.77566
     -0.00339 |       0.00000 |       0.00100 |       0.00072 |       0.77705
     -0.00409 |       0.00000 |       0.00093 |       0.00078 |       0.77617
     -0.00402 |       0.00000 |       0.00088 |       0.00079 |       0.77644
     -0.00423 |       0.00000 |       0.00083 |       0.00092 |       0.77694
     -0.00461 |       0.00000 |       0.00081 |       0.00092 |       0.77673
     -0.00436 |       0.00000 |       0.00076 |       0.00092 |       0.77641
     -0.00461 |       0.00000 |       0.00073 |       0.00104 |       0.77670
Evaluating losses...
     -0.00505 |       0.00000 |       0.00069 |       0.00111 |     

     -0.00556 |       0.00000 |       0.00166 |       0.00159 |       0.78954
     -0.00554 |       0.00000 |       0.00160 |       0.00172 |       0.78898
     -0.00552 |       0.00000 |       0.00158 |       0.00179 |       0.79043
Evaluating losses...
     -0.00641 |       0.00000 |       0.00150 |       0.00173 |       0.78933
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.31          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5332          |
| TimeElapsed     | 2.08e+04      |
| TimestepsSoFar  | 13991936      |
| ev_tdlam_before | 0.442         |
| loss_ent        | 0.7893264     |
| loss_kl         | 0.0017292416  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0064090467 |
| loss_vf_loss    | 0.0015027692  |
-----------------------------------
********** Iteration 3416 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00060 |       0.00000 | 

********** Iteration 3421 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00044 |       0.00000 |       0.00365 |       0.00039 |       0.76131
     -0.00277 |       0.00000 |       0.00311 |       0.00055 |       0.76191
     -0.00427 |       0.00000 |       0.00273 |       0.00059 |       0.76137
     -0.00462 |       0.00000 |       0.00223 |       0.00078 |       0.76117
     -0.00514 |       0.00000 |       0.00182 |       0.00082 |       0.76098
     -0.00553 |       0.00000 |       0.00161 |       0.00085 |       0.76103
     -0.00574 |       0.00000 |       0.00150 |       0.00087 |       0.76142
     -0.00602 |       0.00000 |       0.00144 |       0.00095 |       0.76079
     -0.00603 |       0.00000 |       0.00139 |       0.00092 |       0.76101
     -0.00603 |       0.00000 |       0.00137 |       0.00109 |       0.76118
Evaluating losses...
     -0.00654 |       0.00000 |       0.00131 |       0.00105 |     

     -0.00499 |       0.00000 |      9.04e-05 |       0.00100 |       0.76566
     -0.00543 |       0.00000 |      8.79e-05 |       0.00114 |       0.76436
     -0.00584 |       0.00000 |      8.64e-05 |       0.00111 |       0.76521
Evaluating losses...
     -0.00655 |       0.00000 |      8.49e-05 |       0.00116 |       0.76545
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.35          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5347          |
| TimeElapsed     | 2.1e+04       |
| TimestepsSoFar  | 14036992      |
| ev_tdlam_before | -1.06         |
| loss_ent        | 0.76544845    |
| loss_kl         | 0.0011577258  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0065481355 |
| loss_vf_loss    | 8.4929066e-05 |
-----------------------------------
********** Iteration 3427 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00161 |       0.00000 | 

********** Iteration 3432 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00084 |       0.00000 |       0.00031 |       0.00054 |       0.81352
     -0.00239 |       0.00000 |       0.00020 |       0.00079 |       0.81276
     -0.00451 |       0.00000 |       0.00015 |       0.00088 |       0.81336
     -0.00571 |       0.00000 |       0.00012 |       0.00094 |       0.81356
     -0.00669 |       0.00000 |       0.00011 |       0.00106 |       0.81319
     -0.00673 |       0.00000 |      9.87e-05 |       0.00125 |       0.81349
     -0.00785 |       0.00000 |      9.04e-05 |       0.00133 |       0.81345
     -0.00728 |       0.00000 |      8.68e-05 |       0.00162 |       0.81353
     -0.00838 |       0.00000 |      8.33e-05 |       0.00155 |       0.81397
     -0.00907 |       0.00000 |      8.02e-05 |       0.00157 |       0.81355
Evaluating losses...
     -0.00997 |       0.00000 |      7.90e-05 |       0.00157 |     

     -0.00530 |       0.00000 |       0.00209 |       0.00147 |       0.76029
     -0.00495 |       0.00000 |       0.00204 |       0.00167 |       0.76056
     -0.00551 |       0.00000 |       0.00201 |       0.00174 |       0.75993
Evaluating losses...
     -0.00609 |       0.00000 |       0.00193 |       0.00169 |       0.76114
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.32         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 5362         |
| TimeElapsed     | 2.1e+04      |
| TimestepsSoFar  | 14082048     |
| ev_tdlam_before | 0.532        |
| loss_ent        | 0.7611382    |
| loss_kl         | 0.0016903193 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.006085395 |
| loss_vf_loss    | 0.0019337018 |
----------------------------------
********** Iteration 3438 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00138 |       0.00000 |       0.00038 

********** Iteration 3443 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00032 |       0.00000 |      7.13e-05 |       0.00027 |       0.76890
     -0.00208 |       0.00000 |      5.25e-05 |       0.00064 |       0.77001
     -0.00365 |       0.00000 |      4.59e-05 |       0.00079 |       0.77036
     -0.00446 |       0.00000 |      4.29e-05 |       0.00088 |       0.77004
     -0.00535 |       0.00000 |      3.99e-05 |       0.00097 |       0.76971
     -0.00578 |       0.00000 |      3.85e-05 |       0.00107 |       0.77035
     -0.00600 |       0.00000 |      3.68e-05 |       0.00111 |       0.77005
     -0.00654 |       0.00000 |      3.54e-05 |       0.00120 |       0.77106
     -0.00648 |       0.00000 |      3.41e-05 |       0.00136 |       0.77046
     -0.00699 |       0.00000 |      3.36e-05 |       0.00140 |       0.77078
Evaluating losses...
     -0.00772 |       0.00000 |      3.25e-05 |       0.00139 |     

     -0.00740 |       0.00000 |      7.00e-05 |       0.00124 |       0.77289
     -0.00766 |       0.00000 |      6.60e-05 |       0.00134 |       0.77260
     -0.00798 |       0.00000 |      6.34e-05 |       0.00140 |       0.77207
Evaluating losses...
     -0.00867 |       0.00000 |      6.21e-05 |       0.00135 |       0.77282
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.33          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5377          |
| TimeElapsed     | 2.11e+04      |
| TimestepsSoFar  | 14127104      |
| ev_tdlam_before | -2.06         |
| loss_ent        | 0.7728193     |
| loss_kl         | 0.0013493235  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.00867027   |
| loss_vf_loss    | 6.2092346e-05 |
-----------------------------------
********** Iteration 3449 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00101 |       0.00000 | 

********** Iteration 3454 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00078 |       0.00000 |       0.00108 |       0.00027 |       0.77796
     -0.00159 |       0.00000 |       0.00056 |       0.00039 |       0.77765
     -0.00274 |       0.00000 |       0.00050 |       0.00060 |       0.77753
     -0.00289 |       0.00000 |       0.00046 |       0.00064 |       0.77792
     -0.00354 |       0.00000 |       0.00048 |       0.00070 |       0.77738
     -0.00391 |       0.00000 |       0.00048 |       0.00075 |       0.77755
     -0.00363 |       0.00000 |       0.00045 |       0.00084 |       0.77739
     -0.00394 |       0.00000 |       0.00045 |       0.00081 |       0.77728
     -0.00426 |       0.00000 |       0.00046 |       0.00085 |       0.77774
     -0.00386 |       0.00000 |       0.00044 |       0.00086 |       0.77801
Evaluating losses...
     -0.00437 |       0.00000 |       0.00043 |       0.00092 |     

     -0.00404 |       0.00000 |       0.00034 |       0.00105 |       0.77984
     -0.00418 |       0.00000 |       0.00033 |       0.00101 |       0.78119
     -0.00464 |       0.00000 |       0.00032 |       0.00115 |       0.78024
Evaluating losses...
     -0.00505 |       0.00000 |       0.00030 |       0.00126 |       0.77943
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.26          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5392          |
| TimeElapsed     | 2.11e+04      |
| TimestepsSoFar  | 14172160      |
| ev_tdlam_before | 0.388         |
| loss_ent        | 0.7794278     |
| loss_kl         | 0.0012599807  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0050503365 |
| loss_vf_loss    | 0.0003036326  |
-----------------------------------
********** Iteration 3460 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00056 |       0.00000 | 

********** Iteration 3465 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00134 |       0.00000 |       0.00336 |       0.00082 |       0.77716
     -0.00210 |       0.00000 |       0.00283 |       0.00111 |       0.77493
     -0.00295 |       0.00000 |       0.00248 |       0.00097 |       0.77406
     -0.00360 |       0.00000 |       0.00228 |       0.00129 |       0.77469
     -0.00357 |       0.00000 |       0.00216 |       0.00134 |       0.77356
     -0.00413 |       0.00000 |       0.00204 |       0.00128 |       0.77495
     -0.00470 |       0.00000 |       0.00195 |       0.00135 |       0.77374
     -0.00479 |       0.00000 |       0.00186 |       0.00159 |       0.77496
     -0.00470 |       0.00000 |       0.00181 |       0.00166 |       0.77459
     -0.00525 |       0.00000 |       0.00175 |       0.00201 |       0.77391
Evaluating losses...
     -0.00552 |       0.00000 |       0.00169 |       0.00239 |     

     -0.00361 |       0.00000 |       0.00053 |       0.00080 |       0.80550
     -0.00387 |       0.00000 |       0.00050 |       0.00082 |       0.80543
     -0.00390 |       0.00000 |       0.00049 |       0.00094 |       0.80474
Evaluating losses...
     -0.00403 |       0.00000 |       0.00047 |       0.00093 |       0.80500
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.22          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5407          |
| TimeElapsed     | 2.11e+04      |
| TimestepsSoFar  | 14217216      |
| ev_tdlam_before | 0.19          |
| loss_ent        | 0.8050032     |
| loss_kl         | 0.0009331249  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.004029465  |
| loss_vf_loss    | 0.00046620448 |
-----------------------------------
********** Iteration 3471 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00059 |       0.00000 | 

********** Iteration 3476 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00098 |       0.00000 |       0.00137 |       0.00080 |       0.79445
     -0.00218 |       0.00000 |       0.00090 |       0.00067 |       0.79446
     -0.00280 |       0.00000 |       0.00082 |       0.00071 |       0.79413
     -0.00373 |       0.00000 |       0.00078 |       0.00079 |       0.79359
     -0.00420 |       0.00000 |       0.00076 |       0.00096 |       0.79365
     -0.00399 |       0.00000 |       0.00074 |       0.00144 |       0.79390
     -0.00502 |       0.00000 |       0.00073 |       0.00130 |       0.79400
     -0.00505 |       0.00000 |       0.00072 |       0.00125 |       0.79424
     -0.00498 |       0.00000 |       0.00070 |       0.00161 |       0.79414
     -0.00560 |       0.00000 |       0.00070 |       0.00143 |       0.79291
Evaluating losses...
     -0.00621 |       0.00000 |       0.00068 |       0.00151 |     

     -0.00766 |       0.00000 |       0.00023 |       0.00125 |       0.79285
     -0.00817 |       0.00000 |       0.00022 |       0.00128 |       0.79215
     -0.00851 |       0.00000 |       0.00022 |       0.00139 |       0.79159
     -0.00875 |       0.00000 |       0.00021 |       0.00144 |       0.79183
Evaluating losses...
     -0.00942 |       0.00000 |       0.00021 |       0.00147 |       0.79126
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.25          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5422          |
| TimeElapsed     | 2.13e+04      |
| TimestepsSoFar  | 14262272      |
| ev_tdlam_before | -0.625        |
| loss_ent        | 0.79126114    |
| loss_kl         | 0.0014693963  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.009423843  |
| loss_vf_loss    | 0.00020613775 |
-----------------------------------
********** Iteration 3482 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 3487 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00051 |       0.00000 |       0.00377 |       0.00070 |       0.79130
     -0.00268 |       0.00000 |       0.00351 |       0.00114 |       0.79086
     -0.00361 |       0.00000 |       0.00340 |       0.00114 |       0.79064
     -0.00429 |       0.00000 |       0.00329 |       0.00099 |       0.79128
     -0.00476 |       0.00000 |       0.00322 |       0.00105 |       0.79176
     -0.00475 |       0.00000 |       0.00317 |       0.00131 |       0.79109
     -0.00516 |       0.00000 |       0.00312 |       0.00163 |       0.79175
     -0.00530 |       0.00000 |       0.00308 |       0.00160 |       0.79164
     -0.00554 |       0.00000 |       0.00305 |       0.00151 |       0.79187
     -0.00568 |       0.00000 |       0.00303 |       0.00166 |       0.79046
Evaluating losses...
     -0.00601 |       0.00000 |       0.00301 |       0.00161 |     

     -0.00504 |       0.00000 |       0.00060 |       0.00096 |       0.77642
     -0.00513 |       0.00000 |       0.00056 |       0.00101 |       0.77742
     -0.00509 |       0.00000 |       0.00054 |       0.00106 |       0.77690
Evaluating losses...
     -0.00577 |       0.00000 |       0.00051 |       0.00104 |       0.77694
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.18          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5437          |
| TimeElapsed     | 2.14e+04      |
| TimestepsSoFar  | 14307328      |
| ev_tdlam_before | 0.155         |
| loss_ent        | 0.7769372     |
| loss_kl         | 0.0010442906  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0057735876 |
| loss_vf_loss    | 0.0005112142  |
-----------------------------------
********** Iteration 3493 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     1.59e-06 |       0.00000 | 

********** Iteration 3498 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00115 |       0.00000 |       0.00520 |       0.00030 |       0.74394
     -0.00227 |       0.00000 |       0.00401 |       0.00056 |       0.74470
     -0.00297 |       0.00000 |       0.00369 |       0.00068 |       0.74441
     -0.00409 |       0.00000 |       0.00346 |       0.00077 |       0.74459
     -0.00422 |       0.00000 |       0.00336 |       0.00083 |       0.74452
     -0.00501 |       0.00000 |       0.00328 |       0.00094 |       0.74417
     -0.00567 |       0.00000 |       0.00321 |       0.00085 |       0.74440
     -0.00585 |       0.00000 |       0.00316 |       0.00094 |       0.74385
     -0.00586 |       0.00000 |       0.00311 |       0.00098 |       0.74454
     -0.00591 |       0.00000 |       0.00303 |       0.00100 |       0.74356
Evaluating losses...
     -0.00711 |       0.00000 |       0.00299 |       0.00098 |     

     -0.00636 |       0.00000 |       0.00101 |       0.00098 |       0.78191
     -0.00642 |       0.00000 |       0.00099 |       0.00104 |       0.78182
     -0.00655 |       0.00000 |       0.00099 |       0.00109 |       0.78225
Evaluating losses...
     -0.00710 |       0.00000 |       0.00095 |       0.00113 |       0.78238
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.22          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5452          |
| TimeElapsed     | 2.14e+04      |
| TimestepsSoFar  | 14352384      |
| ev_tdlam_before | 0.498         |
| loss_ent        | 0.7823762     |
| loss_kl         | 0.0011281575  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0070978743 |
| loss_vf_loss    | 0.0009540257  |
-----------------------------------
********** Iteration 3504 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00070 |       0.00000 | 

********** Iteration 3509 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00085 |       0.00000 |       0.00011 |       0.00038 |       0.73621
     -0.00182 |       0.00000 |      8.73e-05 |       0.00057 |       0.73846
     -0.00270 |       0.00000 |      7.45e-05 |       0.00086 |       0.73717
     -0.00343 |       0.00000 |      6.39e-05 |       0.00090 |       0.73783
     -0.00417 |       0.00000 |      5.69e-05 |       0.00082 |       0.73703
     -0.00471 |       0.00000 |      5.13e-05 |       0.00076 |       0.73633
     -0.00511 |       0.00000 |      4.68e-05 |       0.00086 |       0.73645
     -0.00538 |       0.00000 |      4.44e-05 |       0.00088 |       0.73549
     -0.00552 |       0.00000 |      4.08e-05 |       0.00095 |       0.73586
     -0.00543 |       0.00000 |      3.81e-05 |       0.00120 |       0.73723
Evaluating losses...
     -0.00584 |       0.00000 |      3.70e-05 |       0.00130 |     

     -0.00737 |       0.00000 |      8.22e-05 |       0.00134 |       0.78199
     -0.00749 |       0.00000 |      7.89e-05 |       0.00152 |       0.78156
     -0.00785 |       0.00000 |      7.43e-05 |       0.00155 |       0.78207
Evaluating losses...
     -0.00871 |       0.00000 |      7.19e-05 |       0.00154 |       0.78293
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.2          |
| EpThisIter      | 2            |
| EpisodesSoFar   | 5467         |
| TimeElapsed     | 2.15e+04     |
| TimestepsSoFar  | 14397440     |
| ev_tdlam_before | -0.858       |
| loss_ent        | 0.7829336    |
| loss_kl         | 0.0015355683 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.008709753 |
| loss_vf_loss    | 7.185682e-05 |
----------------------------------
********** Iteration 3515 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00066 |       0.00000 |       0.00371 

********** Iteration 3520 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00149 |       0.00000 |       0.00243 |       0.00296 |       0.76465
     -0.00236 |       0.00000 |       0.00203 |       0.00166 |       0.76606
     -0.00331 |       0.00000 |       0.00196 |       0.00154 |       0.76637
     -0.00432 |       0.00000 |       0.00190 |       0.00148 |       0.76632
     -0.00450 |       0.00000 |       0.00187 |       0.00146 |       0.76734
     -0.00532 |       0.00000 |       0.00186 |       0.00118 |       0.76690
     -0.00554 |       0.00000 |       0.00183 |       0.00150 |       0.76739
     -0.00568 |       0.00000 |       0.00182 |       0.00134 |       0.76735
     -0.00624 |       0.00000 |       0.00180 |       0.00144 |       0.76717
     -0.00596 |       0.00000 |       0.00178 |       0.00146 |       0.76765
Evaluating losses...
     -0.00684 |       0.00000 |       0.00175 |       0.00167 |     

     -0.00556 |       0.00000 |      4.59e-05 |       0.00091 |       0.75851
     -0.00571 |       0.00000 |      4.44e-05 |       0.00107 |       0.75815
     -0.00577 |       0.00000 |      4.25e-05 |       0.00108 |       0.75941
Evaluating losses...
     -0.00651 |       0.00000 |      4.15e-05 |       0.00115 |       0.75840
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.25          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5482          |
| TimeElapsed     | 2.15e+04      |
| TimestepsSoFar  | 14442496      |
| ev_tdlam_before | -1.38         |
| loss_ent        | 0.75839734    |
| loss_kl         | 0.0011536123  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.006505345  |
| loss_vf_loss    | 4.1470612e-05 |
-----------------------------------
********** Iteration 3526 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00070 |       0.00000 | 

********** Iteration 3531 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00073 |       0.00000 |       0.00105 |       0.00025 |       0.76558
     -0.00135 |       0.00000 |       0.00053 |       0.00027 |       0.76495
     -0.00249 |       0.00000 |       0.00043 |       0.00045 |       0.76440
     -0.00239 |       0.00000 |       0.00037 |       0.00049 |       0.76466
     -0.00320 |       0.00000 |       0.00034 |       0.00061 |       0.76522
     -0.00334 |       0.00000 |       0.00032 |       0.00068 |       0.76500
     -0.00395 |       0.00000 |       0.00031 |       0.00075 |       0.76497
     -0.00399 |       0.00000 |       0.00030 |       0.00079 |       0.76507
     -0.00414 |       0.00000 |       0.00029 |       0.00088 |       0.76520
     -0.00452 |       0.00000 |       0.00028 |       0.00095 |       0.76576
Evaluating losses...
     -0.00518 |       0.00000 |       0.00026 |       0.00093 |     

     -0.00608 |       0.00000 |       0.00016 |       0.00122 |       0.75056
     -0.00646 |       0.00000 |       0.00016 |       0.00120 |       0.75096
     -0.00695 |       0.00000 |       0.00016 |       0.00127 |       0.75029
Evaluating losses...
     -0.00749 |       0.00000 |       0.00015 |       0.00146 |       0.75038
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.26          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5497          |
| TimeElapsed     | 2.16e+04      |
| TimestepsSoFar  | 14487552      |
| ev_tdlam_before | 0.236         |
| loss_ent        | 0.75037813    |
| loss_kl         | 0.0014550555  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.007494589  |
| loss_vf_loss    | 0.00015369468 |
-----------------------------------
********** Iteration 3537 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00028 |       0.00000 | 

********** Iteration 3542 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00012 |       0.00000 |       0.00121 |       0.00031 |       0.75010
     -0.00234 |       0.00000 |       0.00047 |       0.00043 |       0.74938
     -0.00334 |       0.00000 |       0.00041 |       0.00050 |       0.75112
     -0.00403 |       0.00000 |       0.00039 |       0.00059 |       0.75208
     -0.00403 |       0.00000 |       0.00037 |       0.00060 |       0.75303
     -0.00464 |       0.00000 |       0.00036 |       0.00074 |       0.75273
     -0.00472 |       0.00000 |       0.00035 |       0.00072 |       0.75347
     -0.00486 |       0.00000 |       0.00033 |       0.00075 |       0.75347
     -0.00495 |       0.00000 |       0.00032 |       0.00085 |       0.75436
     -0.00509 |       0.00000 |       0.00032 |       0.00085 |       0.75408
Evaluating losses...
     -0.00525 |       0.00000 |       0.00030 |       0.00089 |     

     -0.00439 |       0.00000 |       0.00209 |       0.00088 |       0.73406
     -0.00459 |       0.00000 |       0.00202 |       0.00091 |       0.73430
     -0.00468 |       0.00000 |       0.00198 |       0.00094 |       0.73391
Evaluating losses...
     -0.00511 |       0.00000 |       0.00190 |       0.00100 |       0.73408
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.27         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 5511         |
| TimeElapsed     | 2.18e+04     |
| TimestepsSoFar  | 14532608     |
| ev_tdlam_before | 0.691        |
| loss_ent        | 0.7340768    |
| loss_kl         | 0.0010005711 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.005107599 |
| loss_vf_loss    | 0.0019043166 |
----------------------------------
********** Iteration 3548 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00023 |       0.00000 |       0.00125 

********** Iteration 3553 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00105 |       0.00000 |       0.00272 |       0.00069 |       0.75588
     -0.00282 |       0.00000 |       0.00221 |       0.00126 |       0.75743
     -0.00330 |       0.00000 |       0.00214 |       0.00184 |       0.75804
     -0.00364 |       0.00000 |       0.00204 |       0.00132 |       0.75770
     -0.00437 |       0.00000 |       0.00197 |       0.00152 |       0.75768
     -0.00428 |       0.00000 |       0.00192 |       0.00132 |       0.75794
     -0.00475 |       0.00000 |       0.00186 |       0.00158 |       0.75806
     -0.00484 |       0.00000 |       0.00182 |       0.00145 |       0.75860
     -0.00522 |       0.00000 |       0.00180 |       0.00138 |       0.75806
     -0.00526 |       0.00000 |       0.00174 |       0.00173 |       0.75855
Evaluating losses...
     -0.00549 |       0.00000 |       0.00169 |       0.00198 |     

     -0.00202 |       0.00000 |       0.00036 |       0.00062 |       0.75637
     -0.00256 |       0.00000 |       0.00036 |       0.00067 |       0.75669
     -0.00192 |       0.00000 |       0.00035 |       0.00068 |       0.75608
Evaluating losses...
     -0.00251 |       0.00000 |       0.00035 |       0.00069 |       0.75618
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.25          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5526          |
| TimeElapsed     | 2.19e+04      |
| TimestepsSoFar  | 14577664      |
| ev_tdlam_before | 0.671         |
| loss_ent        | 0.75618374    |
| loss_kl         | 0.0006915712  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0025149218 |
| loss_vf_loss    | 0.0003454836  |
-----------------------------------
********** Iteration 3559 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00068 |       0.00000 | 

********** Iteration 3564 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00011 |       0.00000 |      8.87e-05 |       0.00020 |       0.74716
     -0.00183 |       0.00000 |      4.56e-05 |       0.00038 |       0.74748
     -0.00276 |       0.00000 |      3.49e-05 |       0.00041 |       0.74699
     -0.00338 |       0.00000 |      3.07e-05 |       0.00049 |       0.74679
     -0.00368 |       0.00000 |      2.91e-05 |       0.00054 |       0.74701
     -0.00399 |       0.00000 |      2.65e-05 |       0.00061 |       0.74689
     -0.00423 |       0.00000 |      2.50e-05 |       0.00063 |       0.74642
     -0.00438 |       0.00000 |      2.42e-05 |       0.00065 |       0.74629
     -0.00450 |       0.00000 |      2.31e-05 |       0.00068 |       0.74625
     -0.00469 |       0.00000 |      2.24e-05 |       0.00074 |       0.74635
Evaluating losses...
     -0.00495 |       0.00000 |      2.13e-05 |       0.00075 |     

     -0.00625 |       0.00000 |      1.01e-05 |       0.00116 |       0.75927
     -0.00652 |       0.00000 |      9.37e-06 |       0.00116 |       0.75929
     -0.00667 |       0.00000 |      9.05e-06 |       0.00124 |       0.75910
Evaluating losses...
     -0.00745 |       0.00000 |      8.69e-06 |       0.00123 |       0.75908
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.23          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5541          |
| TimeElapsed     | 2.19e+04      |
| TimestepsSoFar  | 14622720      |
| ev_tdlam_before | -1.45         |
| loss_ent        | 0.7590849     |
| loss_kl         | 0.00123114    |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0074450984 |
| loss_vf_loss    | 8.687048e-06  |
-----------------------------------
********** Iteration 3570 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     8.08e-05 |       0.00000 | 

********** Iteration 3575 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00057 |       0.00000 |      4.81e-05 |       0.00033 |       0.75106
     -0.00266 |       0.00000 |      3.65e-05 |       0.00045 |       0.75154
     -0.00389 |       0.00000 |      3.15e-05 |       0.00067 |       0.75168
     -0.00470 |       0.00000 |      2.83e-05 |       0.00067 |       0.75140
     -0.00527 |       0.00000 |      2.60e-05 |       0.00083 |       0.75237
     -0.00573 |       0.00000 |      2.47e-05 |       0.00092 |       0.75132
     -0.00639 |       0.00000 |      2.30e-05 |       0.00094 |       0.75166
     -0.00650 |       0.00000 |      2.21e-05 |       0.00101 |       0.75168
     -0.00692 |       0.00000 |      2.10e-05 |       0.00105 |       0.75130
     -0.00729 |       0.00000 |      2.05e-05 |       0.00111 |       0.75139
Evaluating losses...
     -0.00808 |       0.00000 |      1.98e-05 |       0.00116 |     

     -0.00348 |       0.00000 |       0.00116 |       0.00071 |       0.75908
     -0.00364 |       0.00000 |       0.00098 |       0.00077 |       0.75934
     -0.00374 |       0.00000 |       0.00087 |       0.00079 |       0.75860
Evaluating losses...
     -0.00399 |       0.00000 |       0.00081 |       0.00084 |       0.75858
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.15          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5556          |
| TimeElapsed     | 2.2e+04       |
| TimestepsSoFar  | 14667776      |
| ev_tdlam_before | 0.168         |
| loss_ent        | 0.7585836     |
| loss_kl         | 0.0008373693  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0039898045 |
| loss_vf_loss    | 0.0008058586  |
-----------------------------------
********** Iteration 3581 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00092 |       0.00000 | 

********** Iteration 3586 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00131 |       0.00000 |       0.00150 |       0.00026 |       0.72915
     -0.00185 |       0.00000 |       0.00136 |       0.00047 |       0.72797
     -0.00219 |       0.00000 |       0.00132 |       0.00070 |       0.72782
     -0.00317 |       0.00000 |       0.00128 |       0.00082 |       0.72785
     -0.00393 |       0.00000 |       0.00124 |       0.00086 |       0.72750
     -0.00437 |       0.00000 |       0.00123 |       0.00082 |       0.72777
     -0.00365 |       0.00000 |       0.00122 |       0.00100 |       0.72750
     -0.00434 |       0.00000 |       0.00119 |       0.00107 |       0.72803
     -0.00431 |       0.00000 |       0.00117 |       0.00118 |       0.72770
     -0.00468 |       0.00000 |       0.00116 |       0.00126 |       0.72746
Evaluating losses...
     -0.00498 |       0.00000 |       0.00116 |       0.00129 |     

     -0.00391 |       0.00000 |       0.00104 |       0.00066 |       0.76279
     -0.00416 |       0.00000 |       0.00103 |       0.00066 |       0.76269
     -0.00432 |       0.00000 |       0.00100 |       0.00072 |       0.76239
Evaluating losses...
     -0.00444 |       0.00000 |       0.00097 |       0.00078 |       0.76246
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.17          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5571          |
| TimeElapsed     | 2.2e+04       |
| TimestepsSoFar  | 14712832      |
| ev_tdlam_before | 0.485         |
| loss_ent        | 0.7624589     |
| loss_kl         | 0.00078315753 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.004444329  |
| loss_vf_loss    | 0.00096961093 |
-----------------------------------
********** Iteration 3592 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00036 |       0.00000 | 

********** Iteration 3597 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00011 |       0.00000 |       0.00090 |       0.00037 |       0.72769
     -0.00248 |       0.00000 |       0.00056 |       0.00050 |       0.72825
     -0.00359 |       0.00000 |       0.00041 |       0.00056 |       0.72775
     -0.00401 |       0.00000 |       0.00032 |       0.00062 |       0.72809
     -0.00460 |       0.00000 |       0.00027 |       0.00075 |       0.72820
     -0.00490 |       0.00000 |       0.00024 |       0.00085 |       0.72900
     -0.00508 |       0.00000 |       0.00021 |       0.00089 |       0.72877
     -0.00535 |       0.00000 |       0.00020 |       0.00103 |       0.72863
     -0.00542 |       0.00000 |       0.00018 |       0.00102 |       0.72877
     -0.00576 |       0.00000 |       0.00017 |       0.00116 |       0.72940
Evaluating losses...
     -0.00631 |       0.00000 |       0.00016 |       0.00110 |     

     -0.00344 |       0.00000 |       0.00051 |       0.00093 |       0.74543
     -0.00393 |       0.00000 |       0.00052 |       0.00095 |       0.74543
     -0.00395 |       0.00000 |       0.00050 |       0.00095 |       0.74514
     -0.00421 |       0.00000 |       0.00049 |       0.00105 |       0.74551
Evaluating losses...
     -0.00457 |       0.00000 |       0.00047 |       0.00109 |       0.74501
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.09          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5586          |
| TimeElapsed     | 2.23e+04      |
| TimestepsSoFar  | 14757888      |
| ev_tdlam_before | 0.72          |
| loss_ent        | 0.7450068     |
| loss_kl         | 0.0010927753  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.004566847  |
| loss_vf_loss    | 0.00047332788 |
-----------------------------------
********** Iteration 3603 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 3608 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00051 |       0.00000 |       0.00365 |       0.00030 |       0.73412
     -0.00179 |       0.00000 |       0.00359 |       0.00043 |       0.73299
     -0.00277 |       0.00000 |       0.00356 |       0.00056 |       0.73199
     -0.00337 |       0.00000 |       0.00352 |       0.00065 |       0.73267
     -0.00376 |       0.00000 |       0.00354 |       0.00077 |       0.73247
     -0.00412 |       0.00000 |       0.00350 |       0.00076 |       0.73266
     -0.00447 |       0.00000 |       0.00349 |       0.00089 |       0.73268
     -0.00463 |       0.00000 |       0.00350 |       0.00090 |       0.73338
     -0.00482 |       0.00000 |       0.00347 |       0.00104 |       0.73339
     -0.00500 |       0.00000 |       0.00346 |       0.00102 |       0.73406
Evaluating losses...
     -0.00527 |       0.00000 |       0.00343 |       0.00107 |     

     -0.00456 |       0.00000 |       0.00059 |       0.00061 |       0.75169
     -0.00463 |       0.00000 |       0.00055 |       0.00064 |       0.75152
     -0.00477 |       0.00000 |       0.00051 |       0.00068 |       0.75131
Evaluating losses...
     -0.00505 |       0.00000 |       0.00048 |       0.00067 |       0.75154
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.16          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5601          |
| TimeElapsed     | 2.23e+04      |
| TimestepsSoFar  | 14802944      |
| ev_tdlam_before | -0.154        |
| loss_ent        | 0.7515366     |
| loss_kl         | 0.0006650423  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.005048605  |
| loss_vf_loss    | 0.00048191586 |
-----------------------------------
********** Iteration 3614 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00021 |       0.00000 | 

********** Iteration 3619 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00013 |       0.00000 |       0.00126 |       0.00022 |       0.75736
     -0.00170 |       0.00000 |       0.00072 |       0.00040 |       0.75834
     -0.00238 |       0.00000 |       0.00067 |       0.00037 |       0.75775
     -0.00333 |       0.00000 |       0.00065 |       0.00055 |       0.75841
     -0.00344 |       0.00000 |       0.00063 |       0.00065 |       0.75822
     -0.00380 |       0.00000 |       0.00062 |       0.00075 |       0.75844
     -0.00335 |       0.00000 |       0.00060 |       0.00089 |       0.75826
     -0.00408 |       0.00000 |       0.00059 |       0.00092 |       0.75852
     -0.00421 |       0.00000 |       0.00058 |       0.00103 |       0.75895
     -0.00457 |       0.00000 |       0.00057 |       0.00099 |       0.75790
Evaluating losses...
     -0.00477 |       0.00000 |       0.00056 |       0.00108 |     

     -0.00592 |       0.00000 |      1.73e-05 |       0.00083 |       0.76326
     -0.00646 |       0.00000 |      1.66e-05 |       0.00092 |       0.76316
     -0.00652 |       0.00000 |      1.66e-05 |       0.00096 |       0.76320
     -0.00681 |       0.00000 |      1.58e-05 |       0.00111 |       0.76285
Evaluating losses...
     -0.00728 |       0.00000 |      1.52e-05 |       0.00112 |       0.76275
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.14          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5616          |
| TimeElapsed     | 2.23e+04      |
| TimestepsSoFar  | 14848000      |
| ev_tdlam_before | -0.543        |
| loss_ent        | 0.7627546     |
| loss_kl         | 0.0011174078  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0072777933 |
| loss_vf_loss    | 1.5191069e-05 |
-----------------------------------
********** Iteration 3625 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 3630 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00092 |       0.00000 |       0.00071 |       0.00031 |       0.73511
     -0.00124 |       0.00000 |       0.00043 |       0.00047 |       0.73504
     -0.00161 |       0.00000 |       0.00040 |       0.00074 |       0.73521
     -0.00315 |       0.00000 |       0.00037 |       0.00067 |       0.73530
     -0.00320 |       0.00000 |       0.00036 |       0.00066 |       0.73678
     -0.00383 |       0.00000 |       0.00035 |       0.00090 |       0.73642
     -0.00389 |       0.00000 |       0.00034 |       0.00075 |       0.73678
     -0.00371 |       0.00000 |       0.00033 |       0.00085 |       0.73726
     -0.00412 |       0.00000 |       0.00032 |       0.00091 |       0.73724
     -0.00413 |       0.00000 |       0.00032 |       0.00098 |       0.73775
Evaluating losses...
     -0.00445 |       0.00000 |       0.00030 |       0.00157 |     

     -0.00423 |       0.00000 |       0.00096 |       0.00067 |       0.77585
     -0.00429 |       0.00000 |       0.00092 |       0.00071 |       0.77595
     -0.00445 |       0.00000 |       0.00090 |       0.00069 |       0.77572
Evaluating losses...
     -0.00474 |       0.00000 |       0.00086 |       0.00071 |       0.77594
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.19         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 5631         |
| TimeElapsed     | 2.24e+04     |
| TimestepsSoFar  | 14893056     |
| ev_tdlam_before | 0.382        |
| loss_ent        | 0.7759365    |
| loss_kl         | 0.0007133022 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.004744208 |
| loss_vf_loss    | 0.0008613159 |
----------------------------------
********** Iteration 3636 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00013 |       0.00000 |       0.00018 

********** Iteration 3641 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00200 |       0.00000 |       0.00293 |       0.00140 |       0.76768
     -0.00162 |       0.00000 |       0.00237 |       0.00068 |       0.76717
     -0.00259 |       0.00000 |       0.00232 |       0.00072 |       0.76615
     -0.00341 |       0.00000 |       0.00228 |       0.00075 |       0.76592
     -0.00347 |       0.00000 |       0.00228 |       0.00075 |       0.76661
     -0.00394 |       0.00000 |       0.00225 |       0.00079 |       0.76644
     -0.00418 |       0.00000 |       0.00224 |       0.00086 |       0.76595
     -0.00442 |       0.00000 |       0.00222 |       0.00086 |       0.76674
     -0.00471 |       0.00000 |       0.00223 |       0.00090 |       0.76666
     -0.00501 |       0.00000 |       0.00222 |       0.00111 |       0.76680
Evaluating losses...
     -0.00568 |       0.00000 |       0.00219 |       0.00089 |     

     -0.00501 |       0.00000 |       0.00170 |       0.00109 |       0.76875
     -0.00523 |       0.00000 |       0.00167 |       0.00122 |       0.76957
     -0.00548 |       0.00000 |       0.00166 |       0.00126 |       0.76873
Evaluating losses...
     -0.00586 |       0.00000 |       0.00161 |       0.00128 |       0.76895
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.27         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 5646         |
| TimeElapsed     | 2.24e+04     |
| TimestepsSoFar  | 14938112     |
| ev_tdlam_before | 0.603        |
| loss_ent        | 0.7689535    |
| loss_kl         | 0.001280234  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.005860218 |
| loss_vf_loss    | 0.0016135218 |
----------------------------------
********** Iteration 3647 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00165 |       0.00000 |       0.00052 

********** Iteration 3652 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00133 |       0.00000 |       0.00471 |       0.00024 |       0.75511
     -0.00141 |       0.00000 |       0.00331 |       0.00039 |       0.75530
     -0.00221 |       0.00000 |       0.00276 |       0.00042 |       0.75525
     -0.00290 |       0.00000 |       0.00257 |       0.00042 |       0.75486
     -0.00309 |       0.00000 |       0.00244 |       0.00053 |       0.75470
     -0.00350 |       0.00000 |       0.00233 |       0.00054 |       0.75475
     -0.00366 |       0.00000 |       0.00226 |       0.00059 |       0.75590
     -0.00410 |       0.00000 |       0.00218 |       0.00069 |       0.75473
     -0.00418 |       0.00000 |       0.00211 |       0.00075 |       0.75546
     -0.00402 |       0.00000 |       0.00206 |       0.00073 |       0.75566
Evaluating losses...
     -0.00487 |       0.00000 |       0.00205 |       0.00079 |     

     -0.00395 |       0.00000 |       0.00121 |       0.00104 |       0.77167
     -0.00414 |       0.00000 |       0.00120 |       0.00109 |       0.77134
     -0.00427 |       0.00000 |       0.00119 |       0.00112 |       0.77167
Evaluating losses...
     -0.00500 |       0.00000 |       0.00117 |       0.00103 |       0.77008
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.31         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 5661         |
| TimeElapsed     | 2.25e+04     |
| TimestepsSoFar  | 14983168     |
| ev_tdlam_before | 0.756        |
| loss_ent        | 0.77008396   |
| loss_kl         | 0.0010344357 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.004999848 |
| loss_vf_loss    | 0.0011749712 |
----------------------------------
********** Iteration 3658 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00079 |       0.00000 |       0.00567 

********** Iteration 3663 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00072 |       0.00000 |       0.00154 |       0.00013 |       0.76811
     -0.00095 |       0.00000 |       0.00136 |       0.00025 |       0.76735
     -0.00174 |       0.00000 |       0.00128 |       0.00033 |       0.76741
     -0.00269 |       0.00000 |       0.00124 |       0.00040 |       0.76755
     -0.00258 |       0.00000 |       0.00122 |       0.00044 |       0.76778
     -0.00287 |       0.00000 |       0.00119 |       0.00056 |       0.76717
     -0.00288 |       0.00000 |       0.00117 |       0.00053 |       0.76768
     -0.00315 |       0.00000 |       0.00116 |       0.00060 |       0.76748
     -0.00334 |       0.00000 |       0.00114 |       0.00064 |       0.76765
     -0.00354 |       0.00000 |       0.00114 |       0.00075 |       0.76708
Evaluating losses...
     -0.00398 |       0.00000 |       0.00111 |       0.00075 |     

     -0.00512 |       0.00000 |      8.97e-05 |       0.00083 |       0.78033
     -0.00529 |       0.00000 |      8.82e-05 |       0.00088 |       0.77971
     -0.00556 |       0.00000 |      8.48e-05 |       0.00091 |       0.77994
Evaluating losses...
     -0.00601 |       0.00000 |      8.40e-05 |       0.00089 |       0.77950
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.3           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5676          |
| TimeElapsed     | 2.27e+04      |
| TimestepsSoFar  | 15028224      |
| ev_tdlam_before | -0.723        |
| loss_ent        | 0.77949536    |
| loss_kl         | 0.0008914707  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0060064206 |
| loss_vf_loss    | 8.396854e-05  |
-----------------------------------
********** Iteration 3669 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00032 |       0.00000 | 

********** Iteration 3674 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00037 |       0.00000 |       0.00010 |       0.00010 |       0.79164
     -0.00096 |       0.00000 |      4.77e-05 |       0.00023 |       0.79139
     -0.00152 |       0.00000 |      3.63e-05 |       0.00027 |       0.79186
     -0.00202 |       0.00000 |      3.06e-05 |       0.00037 |       0.79211
     -0.00252 |       0.00000 |      2.77e-05 |       0.00043 |       0.79132
     -0.00285 |       0.00000 |      2.54e-05 |       0.00048 |       0.79235
     -0.00317 |       0.00000 |      2.39e-05 |       0.00056 |       0.79187
     -0.00336 |       0.00000 |      2.25e-05 |       0.00061 |       0.79194
     -0.00357 |       0.00000 |      2.18e-05 |       0.00062 |       0.79166
     -0.00371 |       0.00000 |      2.11e-05 |       0.00069 |       0.79182
Evaluating losses...
     -0.00392 |       0.00000 |      2.08e-05 |       0.00072 |     

     -0.00514 |       0.00000 |       0.00013 |       0.00097 |       0.77609
     -0.00515 |       0.00000 |       0.00013 |       0.00103 |       0.77560
     -0.00471 |       0.00000 |       0.00012 |       0.00107 |       0.77585
Evaluating losses...
     -0.00608 |       0.00000 |       0.00012 |       0.00111 |       0.77469
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.38           |
| EpThisIter      | 1              |
| EpisodesSoFar   | 5691           |
| TimeElapsed     | 2.27e+04       |
| TimestepsSoFar  | 15073280       |
| ev_tdlam_before | -0.92          |
| loss_ent        | 0.77469033     |
| loss_kl         | 0.00111084     |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.0060781557  |
| loss_vf_loss    | 0.000119909586 |
------------------------------------
********** Iteration 3680 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00068 |   

********** Iteration 3685 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00080 |       0.00000 |       0.00447 |       0.00013 |       0.78497
     -0.00172 |       0.00000 |       0.00331 |       0.00030 |       0.78446
     -0.00236 |       0.00000 |       0.00283 |       0.00040 |       0.78445
     -0.00309 |       0.00000 |       0.00259 |       0.00048 |       0.78519
     -0.00367 |       0.00000 |       0.00242 |       0.00056 |       0.78531
     -0.00398 |       0.00000 |       0.00235 |       0.00062 |       0.78531
     -0.00436 |       0.00000 |       0.00227 |       0.00069 |       0.78545
     -0.00453 |       0.00000 |       0.00221 |       0.00070 |       0.78502
     -0.00460 |       0.00000 |       0.00217 |       0.00073 |       0.78559
     -0.00481 |       0.00000 |       0.00215 |       0.00080 |       0.78537
Evaluating losses...
     -0.00528 |       0.00000 |       0.00208 |       0.00075 |     

     -0.00685 |       0.00000 |      9.87e-05 |       0.00081 |       0.78903
     -0.00698 |       0.00000 |      9.34e-05 |       0.00087 |       0.78912
     -0.00710 |       0.00000 |      9.05e-05 |       0.00091 |       0.78843
Evaluating losses...
     -0.00786 |       0.00000 |      8.71e-05 |       0.00098 |       0.78757
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.4           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5706          |
| TimeElapsed     | 2.28e+04      |
| TimestepsSoFar  | 15118336      |
| ev_tdlam_before | 0.0168        |
| loss_ent        | 0.7875695     |
| loss_kl         | 0.00097517174 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.007859216  |
| loss_vf_loss    | 8.711315e-05  |
-----------------------------------
********** Iteration 3691 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     6.75e-05 |       0.00000 | 

********** Iteration 3696 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     7.85e-05 |       0.00000 |       0.00234 |       0.00023 |       0.76721
     -0.00276 |       0.00000 |       0.00196 |       0.00048 |       0.76623
     -0.00288 |       0.00000 |       0.00185 |       0.00045 |       0.76666
     -0.00377 |       0.00000 |       0.00177 |       0.00052 |       0.76679
     -0.00421 |       0.00000 |       0.00172 |       0.00058 |       0.76666
     -0.00366 |       0.00000 |       0.00167 |       0.00066 |       0.76728
     -0.00445 |       0.00000 |       0.00164 |       0.00074 |       0.76624
     -0.00455 |       0.00000 |       0.00161 |       0.00075 |       0.76677
     -0.00473 |       0.00000 |       0.00157 |       0.00080 |       0.76732
     -0.00448 |       0.00000 |       0.00154 |       0.00092 |       0.76712
Evaluating losses...
     -0.00513 |       0.00000 |       0.00151 |       0.00089 |     

     -0.00324 |       0.00000 |       0.00163 |       0.00064 |       0.74593
     -0.00358 |       0.00000 |       0.00157 |       0.00077 |       0.74646
     -0.00411 |       0.00000 |       0.00151 |       0.00069 |       0.74632
Evaluating losses...
     -0.00432 |       0.00000 |       0.00146 |       0.00072 |       0.74677
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.43         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 5721         |
| TimeElapsed     | 2.28e+04     |
| TimestepsSoFar  | 15163392     |
| ev_tdlam_before | 0.517        |
| loss_ent        | 0.7467683    |
| loss_kl         | 0.000720484  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.004322741 |
| loss_vf_loss    | 0.0014599838 |
----------------------------------
********** Iteration 3702 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00012 |       0.00000 |       0.00193 

********** Iteration 3707 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00090 |       0.00000 |       0.00094 |       0.00070 |       0.76743
      0.00061 |       0.00000 |       0.00079 |       0.00077 |       0.76443
     -0.00069 |       0.00000 |       0.00074 |       0.00071 |       0.76772
     -0.00200 |       0.00000 |       0.00072 |       0.00067 |       0.76631
     -0.00267 |       0.00000 |       0.00070 |       0.00074 |       0.76799
     -0.00265 |       0.00000 |       0.00068 |       0.00077 |       0.76616
     -0.00223 |       0.00000 |       0.00066 |       0.00109 |       0.76506
     -0.00316 |       0.00000 |       0.00064 |       0.00092 |       0.76617
     -0.00356 |       0.00000 |       0.00063 |       0.00095 |       0.76669
     -0.00342 |       0.00000 |       0.00060 |       0.00094 |       0.76516
Evaluating losses...
     -0.00378 |       0.00000 |       0.00058 |       0.00088 |     

     -0.00464 |       0.00000 |       0.00140 |       0.00075 |       0.80472
     -0.00474 |       0.00000 |       0.00138 |       0.00076 |       0.80484
     -0.00486 |       0.00000 |       0.00137 |       0.00082 |       0.80449
Evaluating losses...
     -0.00526 |       0.00000 |       0.00135 |       0.00084 |       0.80412
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.38          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5736          |
| TimeElapsed     | 2.29e+04      |
| TimestepsSoFar  | 15208448      |
| ev_tdlam_before | 0.46          |
| loss_ent        | 0.80411583    |
| loss_kl         | 0.00083534623 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.005256657  |
| loss_vf_loss    | 0.0013518723  |
-----------------------------------
********** Iteration 3713 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00019 |       0.00000 | 

********** Iteration 3718 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00177 |       0.00000 |       0.00318 |       0.00019 |       0.80908
     -0.00113 |       0.00000 |       0.00280 |       0.00041 |       0.80783
     -0.00322 |       0.00000 |       0.00265 |       0.00046 |       0.80762
     -0.00353 |       0.00000 |       0.00255 |       0.00056 |       0.80777
     -0.00486 |       0.00000 |       0.00250 |       0.00060 |       0.80806
     -0.00466 |       0.00000 |       0.00246 |       0.00075 |       0.80724
     -0.00530 |       0.00000 |       0.00241 |       0.00079 |       0.80756
     -0.00514 |       0.00000 |       0.00238 |       0.00095 |       0.80619
     -0.00577 |       0.00000 |       0.00235 |       0.00091 |       0.80740
     -0.00611 |       0.00000 |       0.00234 |       0.00099 |       0.80665
Evaluating losses...
     -0.00689 |       0.00000 |       0.00229 |       0.00099 |     

     -0.00454 |       0.00000 |       0.00121 |       0.00083 |       0.78586
     -0.00475 |       0.00000 |       0.00120 |       0.00081 |       0.78592
     -0.00494 |       0.00000 |       0.00117 |       0.00087 |       0.78652
     -0.00498 |       0.00000 |       0.00115 |       0.00094 |       0.78614
Evaluating losses...
     -0.00531 |       0.00000 |       0.00111 |       0.00099 |       0.78661
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.44          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5751          |
| TimeElapsed     | 2.31e+04      |
| TimestepsSoFar  | 15253504      |
| ev_tdlam_before | 0.518         |
| loss_ent        | 0.7866101     |
| loss_kl         | 0.0009930962  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0053130286 |
| loss_vf_loss    | 0.0011102809  |
-----------------------------------
********** Iteration 3724 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 3729 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     7.81e-05 |       0.00000 |       0.00077 |      9.37e-05 |       0.77812
     -0.00105 |       0.00000 |       0.00052 |       0.00016 |       0.77856
     -0.00142 |       0.00000 |       0.00051 |       0.00020 |       0.77861
     -0.00155 |       0.00000 |       0.00051 |       0.00026 |       0.77912
     -0.00177 |       0.00000 |       0.00050 |       0.00031 |       0.77831
     -0.00203 |       0.00000 |       0.00050 |       0.00034 |       0.77786
     -0.00193 |       0.00000 |       0.00048 |       0.00041 |       0.77770
     -0.00228 |       0.00000 |       0.00049 |       0.00042 |       0.77813
     -0.00227 |       0.00000 |       0.00049 |       0.00044 |       0.77728
     -0.00243 |       0.00000 |       0.00048 |       0.00047 |       0.77768
Evaluating losses...
     -0.00268 |       0.00000 |       0.00047 |       0.00047 |     

     -0.00389 |       0.00000 |       0.00059 |       0.00085 |       0.77770
     -0.00385 |       0.00000 |       0.00056 |       0.00091 |       0.77836
     -0.00382 |       0.00000 |       0.00054 |       0.00092 |       0.77790
Evaluating losses...
     -0.00412 |       0.00000 |       0.00051 |       0.00096 |       0.77811
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.4          |
| EpThisIter      | 2            |
| EpisodesSoFar   | 5766         |
| TimeElapsed     | 2.31e+04     |
| TimestepsSoFar  | 15298560     |
| ev_tdlam_before | 0.561        |
| loss_ent        | 0.77811      |
| loss_kl         | 0.0009556278 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.004122059 |
| loss_vf_loss    | 0.0005119204 |
----------------------------------
********** Iteration 3735 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00048 |       0.00000 |      5.41e-05 

********** Iteration 3740 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00054 |       0.00000 |       0.00166 |       0.00024 |       0.79430
     -0.00276 |       0.00000 |       0.00116 |       0.00046 |       0.79328
     -0.00390 |       0.00000 |       0.00106 |       0.00053 |       0.79400
     -0.00440 |       0.00000 |       0.00100 |       0.00058 |       0.79433
     -0.00503 |       0.00000 |       0.00095 |       0.00065 |       0.79407
     -0.00534 |       0.00000 |       0.00089 |       0.00070 |       0.79429
     -0.00543 |       0.00000 |       0.00086 |       0.00076 |       0.79413
     -0.00561 |       0.00000 |       0.00083 |       0.00082 |       0.79438
     -0.00583 |       0.00000 |       0.00081 |       0.00086 |       0.79414
     -0.00592 |       0.00000 |       0.00080 |       0.00086 |       0.79431
Evaluating losses...
     -0.00619 |       0.00000 |       0.00076 |       0.00091 |     

     -0.00444 |       0.00000 |      2.39e-05 |       0.00072 |       0.77379
     -0.00466 |       0.00000 |      2.27e-05 |       0.00072 |       0.77217
     -0.00473 |       0.00000 |      2.19e-05 |       0.00078 |       0.77343
Evaluating losses...
     -0.00527 |       0.00000 |      2.09e-05 |       0.00082 |       0.77233
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.41         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 5781         |
| TimeElapsed     | 2.32e+04     |
| TimestepsSoFar  | 15343616     |
| ev_tdlam_before | -2.4         |
| loss_ent        | 0.77233267   |
| loss_kl         | 0.0008166309 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.005274277 |
| loss_vf_loss    | 2.0943e-05   |
----------------------------------
********** Iteration 3746 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00049 |       0.00000 |       0.00337 

********** Iteration 3751 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00032 |       0.00000 |       0.00166 |      8.15e-05 |       0.77988
     -0.00130 |       0.00000 |       0.00132 |       0.00014 |       0.77980
     -0.00193 |       0.00000 |       0.00098 |       0.00024 |       0.78122
     -0.00248 |       0.00000 |       0.00074 |       0.00033 |       0.78107
     -0.00275 |       0.00000 |       0.00060 |       0.00035 |       0.78104
     -0.00303 |       0.00000 |       0.00053 |       0.00040 |       0.78115
     -0.00321 |       0.00000 |       0.00048 |       0.00044 |       0.78133
     -0.00336 |       0.00000 |       0.00045 |       0.00047 |       0.78147
     -0.00349 |       0.00000 |       0.00043 |       0.00049 |       0.78174
     -0.00352 |       0.00000 |       0.00042 |       0.00053 |       0.78148
Evaluating losses...
     -0.00377 |       0.00000 |       0.00040 |       0.00052 |     

     -0.00325 |       0.00000 |       0.00062 |       0.00068 |       0.80059
     -0.00290 |       0.00000 |       0.00059 |       0.00073 |       0.80083
     -0.00313 |       0.00000 |       0.00057 |       0.00078 |       0.80061
Evaluating losses...
     -0.00376 |       0.00000 |       0.00055 |       0.00077 |       0.80061
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.47          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5796          |
| TimeElapsed     | 2.32e+04      |
| TimestepsSoFar  | 15388672      |
| ev_tdlam_before | 0.639         |
| loss_ent        | 0.8006105     |
| loss_kl         | 0.000766893   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0037555154 |
| loss_vf_loss    | 0.0005544536  |
-----------------------------------
********** Iteration 3757 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     1.63e-05 |       0.00000 | 

********** Iteration 3762 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -9.35e-05 |       0.00000 |       0.00155 |       0.00019 |       0.74496
     -0.00171 |       0.00000 |       0.00118 |       0.00039 |       0.74394
     -0.00193 |       0.00000 |       0.00108 |       0.00035 |       0.74452
     -0.00226 |       0.00000 |       0.00103 |       0.00032 |       0.74420
     -0.00205 |       0.00000 |       0.00100 |       0.00050 |       0.74269
     -0.00232 |       0.00000 |       0.00097 |       0.00043 |       0.74363
     -0.00257 |       0.00000 |       0.00095 |       0.00047 |       0.74318
     -0.00249 |       0.00000 |       0.00093 |       0.00051 |       0.74388
     -0.00276 |       0.00000 |       0.00092 |       0.00052 |       0.74358
     -0.00284 |       0.00000 |       0.00091 |       0.00052 |       0.74397
Evaluating losses...
     -0.00310 |       0.00000 |       0.00091 |       0.00052 |     

     -0.00432 |       0.00000 |       0.00084 |       0.00088 |       0.76522
     -0.00472 |       0.00000 |       0.00084 |       0.00084 |       0.76543
     -0.00489 |       0.00000 |       0.00083 |       0.00088 |       0.76552
Evaluating losses...
     -0.00516 |       0.00000 |       0.00081 |       0.00080 |       0.76547
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.39         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 5811         |
| TimeElapsed     | 2.32e+04     |
| TimestepsSoFar  | 15433728     |
| ev_tdlam_before | 0.329        |
| loss_ent        | 0.76547015   |
| loss_kl         | 0.0008023567 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.005161462 |
| loss_vf_loss    | 0.0008121489 |
----------------------------------
********** Iteration 3768 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00058 |       0.00000 |       0.00014 

********** Iteration 3773 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00025 |       0.00000 |       0.00323 |       0.00021 |       0.77525
     -0.00209 |       0.00000 |       0.00259 |       0.00032 |       0.77600
     -0.00254 |       0.00000 |       0.00231 |       0.00039 |       0.77640
     -0.00306 |       0.00000 |       0.00212 |       0.00046 |       0.77504
     -0.00311 |       0.00000 |       0.00197 |       0.00042 |       0.77608
     -0.00336 |       0.00000 |       0.00185 |       0.00056 |       0.77534
     -0.00376 |       0.00000 |       0.00179 |       0.00053 |       0.77598
     -0.00389 |       0.00000 |       0.00171 |       0.00058 |       0.77484
     -0.00380 |       0.00000 |       0.00165 |       0.00073 |       0.77538
     -0.00404 |       0.00000 |       0.00158 |       0.00068 |       0.77615
Evaluating losses...
     -0.00451 |       0.00000 |       0.00154 |       0.00073 |     

     -0.00560 |       0.00000 |      1.42e-05 |       0.00083 |       0.80128
     -0.00582 |       0.00000 |      1.36e-05 |       0.00091 |       0.80088
     -0.00595 |       0.00000 |      1.31e-05 |       0.00099 |       0.80077
Evaluating losses...
     -0.00661 |       0.00000 |      1.29e-05 |       0.00115 |       0.80140
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.37         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 5826         |
| TimeElapsed     | 2.33e+04     |
| TimestepsSoFar  | 15478784     |
| ev_tdlam_before | -1.11        |
| loss_ent        | 0.8013984    |
| loss_kl         | 0.0011536883 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.006613725 |
| loss_vf_loss    | 1.291308e-05 |
----------------------------------
********** Iteration 3779 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00184 |       0.00000 |       0.00187 

********** Iteration 3784 ************
Eval num_timesteps=15499264, episode_reward=-0.07 +/- 1.00
Episode length: 3000.00 +/- 0.00
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00223 |       0.00000 |       0.00555 |       0.00105 |       0.77155
     -0.00167 |       0.00000 |       0.00434 |       0.00050 |       0.77122
     -0.00317 |       0.00000 |       0.00344 |       0.00050 |       0.77084
     -0.00320 |       0.00000 |       0.00288 |       0.00074 |       0.77109
     -0.00404 |       0.00000 |       0.00270 |       0.00070 |       0.77167
     -0.00382 |       0.00000 |       0.00261 |       0.00069 |       0.77167
     -0.00421 |       0.00000 |       0.00254 |       0.00073 |       0.77202
     -0.00460 |       0.00000 |       0.00247 |       0.00087 |       0.77107
     -0.00472 |       0.00000 |       0.00244 |       0.00094 |       0.77111
     -0.00472 |       0.00000 |       0.00240 |       0.00084 |       0.771

     -0.00325 |       0.00000 |       0.00326 |       0.00073 |       0.75229
     -0.00346 |       0.00000 |       0.00324 |       0.00077 |       0.75267
     -0.00364 |       0.00000 |       0.00323 |       0.00079 |       0.75248
     -0.00372 |       0.00000 |       0.00320 |       0.00082 |       0.75230
Evaluating losses...
     -0.00404 |       0.00000 |       0.00316 |       0.00078 |       0.75246
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.32          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5840          |
| TimeElapsed     | 2.35e+04      |
| TimestepsSoFar  | 15523840      |
| ev_tdlam_before | 0.535         |
| loss_ent        | 0.752464      |
| loss_kl         | 0.0007776551  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0040352554 |
| loss_vf_loss    | 0.0031636686  |
-----------------------------------
********** Iteration 3790 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 3795 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     3.84e-05 |       0.00000 |       0.00260 |       0.00030 |       0.76397
     -0.00202 |       0.00000 |       0.00217 |       0.00038 |       0.76400
     -0.00261 |       0.00000 |       0.00199 |       0.00041 |       0.76367
     -0.00284 |       0.00000 |       0.00194 |       0.00047 |       0.76438
     -0.00303 |       0.00000 |       0.00188 |       0.00054 |       0.76348
     -0.00319 |       0.00000 |       0.00184 |       0.00064 |       0.76284
     -0.00348 |       0.00000 |       0.00181 |       0.00066 |       0.76340
     -0.00364 |       0.00000 |       0.00179 |       0.00067 |       0.76360
     -0.00375 |       0.00000 |       0.00177 |       0.00076 |       0.76378
     -0.00385 |       0.00000 |       0.00175 |       0.00080 |       0.76424
Evaluating losses...
     -0.00414 |       0.00000 |       0.00173 |       0.00080 |     

     -0.00304 |       0.00000 |       0.00084 |       0.00045 |       0.76513
     -0.00326 |       0.00000 |       0.00081 |       0.00046 |       0.76563
     -0.00369 |       0.00000 |       0.00080 |       0.00045 |       0.76464
Evaluating losses...
     -0.00401 |       0.00000 |       0.00077 |       0.00047 |       0.76550
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.27          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5855          |
| TimeElapsed     | 2.35e+04      |
| TimestepsSoFar  | 15568896      |
| ev_tdlam_before | 0.383         |
| loss_ent        | 0.76549816    |
| loss_kl         | 0.00047250465 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0040120576 |
| loss_vf_loss    | 0.0007660015  |
-----------------------------------
********** Iteration 3801 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00042 |       0.00000 | 

********** Iteration 3806 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -2.71e-05 |       0.00000 |       0.00474 |       0.00016 |       0.77730
     -0.00201 |       0.00000 |       0.00336 |       0.00032 |       0.77558
     -0.00270 |       0.00000 |       0.00297 |       0.00035 |       0.77600
     -0.00303 |       0.00000 |       0.00275 |       0.00041 |       0.77630
     -0.00369 |       0.00000 |       0.00261 |       0.00044 |       0.77520
     -0.00393 |       0.00000 |       0.00245 |       0.00049 |       0.77545
     -0.00396 |       0.00000 |       0.00228 |       0.00054 |       0.77511
     -0.00419 |       0.00000 |       0.00216 |       0.00063 |       0.77557
     -0.00455 |       0.00000 |       0.00206 |       0.00066 |       0.77514
     -0.00442 |       0.00000 |       0.00198 |       0.00072 |       0.77515
Evaluating losses...
     -0.00497 |       0.00000 |       0.00190 |       0.00074 |     

     -0.00485 |       0.00000 |      2.29e-05 |       0.00071 |       0.79529
     -0.00534 |       0.00000 |      2.17e-05 |       0.00083 |       0.79534
     -0.00535 |       0.00000 |      2.13e-05 |       0.00091 |       0.79520
Evaluating losses...
     -0.00605 |       0.00000 |      2.04e-05 |       0.00111 |       0.79556
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.26          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5870          |
| TimeElapsed     | 2.35e+04      |
| TimestepsSoFar  | 15613952      |
| ev_tdlam_before | -1.24         |
| loss_ent        | 0.79555845    |
| loss_kl         | 0.0011054787  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0060466155 |
| loss_vf_loss    | 2.0374177e-05 |
-----------------------------------
********** Iteration 3812 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00025 |       0.00000 | 

********** Iteration 3817 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00027 |       0.00000 |       0.00431 |       0.00048 |       0.76847
     -0.00194 |       0.00000 |       0.00400 |       0.00061 |       0.76715
     -0.00242 |       0.00000 |       0.00383 |       0.00038 |       0.76926
     -0.00293 |       0.00000 |       0.00371 |       0.00057 |       0.76786
     -0.00324 |       0.00000 |       0.00361 |       0.00048 |       0.76819
     -0.00372 |       0.00000 |       0.00351 |       0.00052 |       0.76848
     -0.00360 |       0.00000 |       0.00338 |       0.00072 |       0.76916
     -0.00403 |       0.00000 |       0.00328 |       0.00099 |       0.76846
     -0.00407 |       0.00000 |       0.00316 |       0.00087 |       0.76796
     -0.00443 |       0.00000 |       0.00308 |       0.00087 |       0.76795
Evaluating losses...
     -0.00470 |       0.00000 |       0.00301 |       0.00093 |     

     -0.00531 |       0.00000 |      5.60e-05 |       0.00054 |       0.77290
     -0.00569 |       0.00000 |      5.42e-05 |       0.00055 |       0.77238
     -0.00600 |       0.00000 |      5.25e-05 |       0.00056 |       0.77289
Evaluating losses...
     -0.00684 |       0.00000 |      5.12e-05 |       0.00061 |       0.77252
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.2           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5886          |
| TimeElapsed     | 2.36e+04      |
| TimestepsSoFar  | 15659008      |
| ev_tdlam_before | -0.172        |
| loss_ent        | 0.7725244     |
| loss_kl         | 0.00061197486 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0068350034 |
| loss_vf_loss    | 5.12304e-05   |
-----------------------------------
********** Iteration 3823 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00037 |       0.00000 | 

********** Iteration 3828 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00054 |       0.00000 |       0.00122 |       0.00084 |       0.78282
     -0.00078 |       0.00000 |       0.00055 |       0.00032 |       0.78196
     -0.00123 |       0.00000 |       0.00050 |       0.00033 |       0.78149
     -0.00173 |       0.00000 |       0.00046 |       0.00028 |       0.78183
     -0.00194 |       0.00000 |       0.00044 |       0.00035 |       0.78206
     -0.00195 |       0.00000 |       0.00042 |       0.00038 |       0.78163
     -0.00229 |       0.00000 |       0.00042 |       0.00040 |       0.78180
     -0.00226 |       0.00000 |       0.00040 |       0.00044 |       0.78206
     -0.00244 |       0.00000 |       0.00040 |       0.00046 |       0.78224
     -0.00255 |       0.00000 |       0.00039 |       0.00047 |       0.78226
Evaluating losses...
     -0.00275 |       0.00000 |       0.00038 |       0.00044 |     

     -0.00226 |       0.00000 |       0.00083 |       0.00045 |       0.80875
     -0.00224 |       0.00000 |       0.00082 |       0.00046 |       0.80904
     -0.00233 |       0.00000 |       0.00081 |       0.00057 |       0.80901
     -0.00264 |       0.00000 |       0.00081 |       0.00057 |       0.80931
Evaluating losses...
     -0.00300 |       0.00000 |       0.00078 |       0.00060 |       0.80932
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.13          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5901          |
| TimeElapsed     | 2.36e+04      |
| TimestepsSoFar  | 15704064      |
| ev_tdlam_before | 0.533         |
| loss_ent        | 0.809325      |
| loss_kl         | 0.00059669884 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0030024354 |
| loss_vf_loss    | 0.0007800239  |
-----------------------------------
********** Iteration 3834 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 3839 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00022 |       0.00000 |      7.83e-05 |       0.00080 |       0.75380
     -0.00135 |       0.00000 |      5.32e-05 |       0.00080 |       0.75298
     -0.00224 |       0.00000 |      4.15e-05 |       0.00052 |       0.75357
     -0.00259 |       0.00000 |      3.51e-05 |       0.00051 |       0.75447
     -0.00290 |       0.00000 |      3.08e-05 |       0.00051 |       0.75460
     -0.00311 |       0.00000 |      2.76e-05 |       0.00052 |       0.75468
     -0.00333 |       0.00000 |      2.53e-05 |       0.00054 |       0.75432
     -0.00351 |       0.00000 |      2.34e-05 |       0.00058 |       0.75462
     -0.00357 |       0.00000 |      2.19e-05 |       0.00065 |       0.75455
     -0.00374 |       0.00000 |      2.06e-05 |       0.00065 |       0.75484
Evaluating losses...
     -0.00404 |       0.00000 |      1.95e-05 |       0.00068 |     

     -0.00351 |       0.00000 |       0.00204 |       0.00066 |       0.77612
     -0.00357 |       0.00000 |       0.00203 |       0.00067 |       0.77560
     -0.00369 |       0.00000 |       0.00200 |       0.00063 |       0.77547
     -0.00375 |       0.00000 |       0.00201 |       0.00061 |       0.77499
Evaluating losses...
     -0.00385 |       0.00000 |       0.00197 |       0.00058 |       0.77542
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.12          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 5916          |
| TimeElapsed     | 2.36e+04      |
| TimestepsSoFar  | 15749120      |
| ev_tdlam_before | 0.663         |
| loss_ent        | 0.7754151     |
| loss_kl         | 0.00058307336 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0038462654 |
| loss_vf_loss    | 0.0019690192  |
-----------------------------------
********** Iteration 3845 ************
Eval num_timesteps=15749120, episode_reward=0.

********** Iteration 3850 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     6.85e-05 |       0.00000 |       0.00039 |       0.00018 |       0.77959
     -0.00148 |       0.00000 |       0.00023 |       0.00040 |       0.77977
     -0.00235 |       0.00000 |       0.00019 |       0.00042 |       0.78063
     -0.00284 |       0.00000 |       0.00016 |       0.00043 |       0.77998
     -0.00322 |       0.00000 |       0.00015 |       0.00055 |       0.78035
     -0.00348 |       0.00000 |       0.00014 |       0.00059 |       0.78026
     -0.00378 |       0.00000 |       0.00013 |       0.00065 |       0.78036
     -0.00396 |       0.00000 |       0.00013 |       0.00069 |       0.78039
     -0.00414 |       0.00000 |       0.00012 |       0.00072 |       0.78061
     -0.00433 |       0.00000 |       0.00012 |       0.00075 |       0.78049
Evaluating losses...
     -0.00465 |       0.00000 |       0.00012 |       0.00082 |     

     -0.00380 |       0.00000 |       0.00199 |       0.00071 |       0.76297
     -0.00396 |       0.00000 |       0.00197 |       0.00076 |       0.76342
     -0.00409 |       0.00000 |       0.00194 |       0.00077 |       0.76352
Evaluating losses...
     -0.00432 |       0.00000 |       0.00189 |       0.00076 |       0.76386
----------------------------------
| EpLenMean       | 3.02e+03     |
| EpRewMean       | 0.18         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 5930         |
| TimeElapsed     | 2.38e+04     |
| TimestepsSoFar  | 15794176     |
| ev_tdlam_before | 0.599        |
| loss_ent        | 0.76385635   |
| loss_kl         | 0.0007617168 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.004318555 |
| loss_vf_loss    | 0.0018925412 |
----------------------------------
********** Iteration 3856 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00024 |       0.00000 |      7.74e-05 

********** Iteration 3861 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00018 |       0.00000 |       0.00025 |       0.00021 |       0.78837
     -0.00207 |       0.00000 |       0.00012 |       0.00030 |       0.78760
     -0.00262 |       0.00000 |      9.14e-05 |       0.00043 |       0.78777
     -0.00294 |       0.00000 |      7.96e-05 |       0.00050 |       0.78846
     -0.00338 |       0.00000 |      7.21e-05 |       0.00054 |       0.78814
     -0.00359 |       0.00000 |      6.71e-05 |       0.00057 |       0.78853
     -0.00376 |       0.00000 |      6.37e-05 |       0.00061 |       0.78852
     -0.00385 |       0.00000 |      6.11e-05 |       0.00062 |       0.78838
     -0.00389 |       0.00000 |      5.88e-05 |       0.00066 |       0.78859
     -0.00402 |       0.00000 |      5.81e-05 |       0.00070 |       0.78833
Evaluating losses...
     -0.00432 |       0.00000 |      5.60e-05 |       0.00070 |     

     -0.00457 |       0.00000 |      2.36e-05 |       0.00049 |       0.75386
     -0.00489 |       0.00000 |      2.31e-05 |       0.00048 |       0.75361
     -0.00497 |       0.00000 |      2.20e-05 |       0.00056 |       0.75389
Evaluating losses...
     -0.00551 |       0.00000 |      2.15e-05 |       0.00064 |       0.75398
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.18          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5945          |
| TimeElapsed     | 2.38e+04      |
| TimestepsSoFar  | 15839232      |
| ev_tdlam_before | -0.597        |
| loss_ent        | 0.7539829     |
| loss_kl         | 0.0006384202  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.005513339  |
| loss_vf_loss    | 2.1480375e-05 |
-----------------------------------
********** Iteration 3867 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00018 |       0.00000 | 

********** Iteration 3872 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00028 |       0.00000 |       0.00128 |      9.36e-05 |       0.76618
     -0.00123 |       0.00000 |       0.00105 |       0.00028 |       0.76662
     -0.00186 |       0.00000 |       0.00096 |       0.00027 |       0.76484
     -0.00194 |       0.00000 |       0.00091 |       0.00031 |       0.76493
     -0.00241 |       0.00000 |       0.00087 |       0.00040 |       0.76436
     -0.00254 |       0.00000 |       0.00085 |       0.00043 |       0.76352
     -0.00270 |       0.00000 |       0.00083 |       0.00050 |       0.76422
     -0.00282 |       0.00000 |       0.00081 |       0.00048 |       0.76439
     -0.00293 |       0.00000 |       0.00079 |       0.00058 |       0.76411
     -0.00302 |       0.00000 |       0.00078 |       0.00058 |       0.76372
Evaluating losses...
     -0.00337 |       0.00000 |       0.00077 |       0.00063 |     

     -0.00459 |       0.00000 |      1.13e-05 |       0.00060 |       0.74114
     -0.00488 |       0.00000 |      1.08e-05 |       0.00062 |       0.74074
     -0.00513 |       0.00000 |      1.05e-05 |       0.00067 |       0.74038
Evaluating losses...
     -0.00544 |       0.00000 |      1.03e-05 |       0.00072 |       0.74128
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.18           |
| EpThisIter      | 1              |
| EpisodesSoFar   | 5960           |
| TimeElapsed     | 2.39e+04       |
| TimestepsSoFar  | 15884288       |
| ev_tdlam_before | -2.12          |
| loss_ent        | 0.7412781      |
| loss_kl         | 0.0007206568   |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.0054428037  |
| loss_vf_loss    | 1.02945805e-05 |
------------------------------------
********** Iteration 3878 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00031 |   

********** Iteration 3883 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     4.74e-05 |       0.00000 |       0.00205 |      8.33e-05 |       0.74824
     -0.00116 |       0.00000 |       0.00130 |       0.00025 |       0.74723
     -0.00169 |       0.00000 |       0.00108 |       0.00025 |       0.74800
     -0.00209 |       0.00000 |       0.00101 |       0.00030 |       0.74843
     -0.00228 |       0.00000 |       0.00098 |       0.00037 |       0.74869
     -0.00245 |       0.00000 |       0.00095 |       0.00037 |       0.74863
     -0.00257 |       0.00000 |       0.00093 |       0.00041 |       0.74891
     -0.00275 |       0.00000 |       0.00091 |       0.00049 |       0.74883
     -0.00267 |       0.00000 |       0.00089 |       0.00054 |       0.74854
     -0.00319 |       0.00000 |       0.00089 |       0.00054 |       0.74920
Evaluating losses...
     -0.00314 |       0.00000 |       0.00086 |       0.00051 |     

     -0.00332 |       0.00000 |       0.00080 |       0.00041 |       0.78794
     -0.00340 |       0.00000 |       0.00079 |       0.00045 |       0.78823
     -0.00349 |       0.00000 |       0.00077 |       0.00047 |       0.78870
Evaluating losses...
     -0.00408 |       0.00000 |       0.00074 |       0.00045 |       0.78864
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.26          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5975          |
| TimeElapsed     | 2.39e+04      |
| TimestepsSoFar  | 15929344      |
| ev_tdlam_before | 0.465         |
| loss_ent        | 0.7886353     |
| loss_kl         | 0.00044905258 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0040787663 |
| loss_vf_loss    | 0.00074459094 |
-----------------------------------
********** Iteration 3889 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00035 |       0.00000 | 

********** Iteration 3894 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00072 |       0.00000 |       0.00297 |       0.00035 |       0.73948
     -0.00070 |       0.00000 |       0.00272 |       0.00135 |       0.74214
     -0.00233 |       0.00000 |       0.00260 |       0.00054 |       0.74006
     -0.00091 |       0.00000 |       0.00250 |       0.00085 |       0.74017
     -0.00287 |       0.00000 |       0.00244 |       0.00062 |       0.74100
     -0.00318 |       0.00000 |       0.00239 |       0.00050 |       0.74083
     -0.00348 |       0.00000 |       0.00234 |       0.00050 |       0.74139
     -0.00362 |       0.00000 |       0.00230 |       0.00050 |       0.74107
     -0.00364 |       0.00000 |       0.00227 |       0.00055 |       0.74138
     -0.00366 |       0.00000 |       0.00224 |       0.00061 |       0.74149
Evaluating losses...
     -0.00356 |       0.00000 |       0.00223 |       0.00074 |     

     -0.00458 |       0.00000 |      8.94e-05 |       0.00061 |       0.76870
     -0.00484 |       0.00000 |      8.65e-05 |       0.00060 |       0.76886
     -0.00489 |       0.00000 |      8.35e-05 |       0.00064 |       0.76955
Evaluating losses...
     -0.00537 |       0.00000 |      8.28e-05 |       0.00061 |       0.76922
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.27          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 5990          |
| TimeElapsed     | 2.4e+04       |
| TimestepsSoFar  | 15974400      |
| ev_tdlam_before | -0.985        |
| loss_ent        | 0.7692172     |
| loss_kl         | 0.00060878944 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0053660255 |
| loss_vf_loss    | 8.278906e-05  |
-----------------------------------
********** Iteration 3900 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00028 |       0.00000 | 

********** Iteration 3905 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00069 |       0.00000 |       0.00289 |       0.00083 |       0.76363
     -0.00130 |       0.00000 |       0.00223 |       0.00040 |       0.76381
     -0.00183 |       0.00000 |       0.00212 |       0.00043 |       0.76311
     -0.00208 |       0.00000 |       0.00204 |       0.00056 |       0.76406
     -0.00236 |       0.00000 |       0.00199 |       0.00067 |       0.76382
     -0.00244 |       0.00000 |       0.00194 |       0.00074 |       0.76360
     -0.00276 |       0.00000 |       0.00190 |       0.00082 |       0.76277
     -0.00287 |       0.00000 |       0.00186 |       0.00097 |       0.76232
     -0.00282 |       0.00000 |       0.00183 |       0.00106 |       0.76242
     -0.00291 |       0.00000 |       0.00181 |       0.00106 |       0.76289
Evaluating losses...
     -0.00306 |       0.00000 |       0.00178 |       0.00119 |     

     -0.00298 |       0.00000 |       0.00255 |       0.00059 |       0.76171
     -0.00321 |       0.00000 |       0.00242 |       0.00058 |       0.76205
     -0.00324 |       0.00000 |       0.00232 |       0.00058 |       0.76258
     -0.00327 |       0.00000 |       0.00224 |       0.00072 |       0.76263
Evaluating losses...
     -0.00361 |       0.00000 |       0.00216 |       0.00066 |       0.76233
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.35          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6005          |
| TimeElapsed     | 2.42e+04      |
| TimestepsSoFar  | 16019456      |
| ev_tdlam_before | 0.109         |
| loss_ent        | 0.76232874    |
| loss_kl         | 0.0006564436  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0036090952 |
| loss_vf_loss    | 0.0021556637  |
-----------------------------------
********** Iteration 3911 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 3916 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00023 |       0.00000 |       0.00405 |       0.00014 |       0.77906
     -0.00186 |       0.00000 |       0.00354 |       0.00034 |       0.77798
     -0.00236 |       0.00000 |       0.00323 |       0.00046 |       0.77899
     -0.00274 |       0.00000 |       0.00305 |       0.00040 |       0.77854
     -0.00317 |       0.00000 |       0.00293 |       0.00044 |       0.77884
     -0.00327 |       0.00000 |       0.00282 |       0.00049 |       0.77857
     -0.00333 |       0.00000 |       0.00276 |       0.00048 |       0.77860
     -0.00388 |       0.00000 |       0.00268 |       0.00056 |       0.77896
     -0.00377 |       0.00000 |       0.00264 |       0.00058 |       0.77888
     -0.00432 |       0.00000 |       0.00257 |       0.00060 |       0.77865
Evaluating losses...
     -0.00451 |       0.00000 |       0.00251 |       0.00062 |     

     -0.00304 |       0.00000 |       0.00161 |       0.00042 |       0.78750
     -0.00329 |       0.00000 |       0.00158 |       0.00043 |       0.78788
     -0.00345 |       0.00000 |       0.00157 |       0.00047 |       0.78783
Evaluating losses...
     -0.00366 |       0.00000 |       0.00150 |       0.00050 |       0.78768
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.36          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6020          |
| TimeElapsed     | 2.42e+04      |
| TimestepsSoFar  | 16064512      |
| ev_tdlam_before | 0.448         |
| loss_ent        | 0.7876828     |
| loss_kl         | 0.0004994861  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0036647478 |
| loss_vf_loss    | 0.0014988257  |
-----------------------------------
********** Iteration 3922 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00021 |       0.00000 | 

********** Iteration 3927 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00063 |       0.00000 |       0.00131 |      7.51e-05 |       0.74086
     -0.00089 |       0.00000 |       0.00105 |      9.72e-05 |       0.74070
     -0.00136 |       0.00000 |       0.00095 |       0.00012 |       0.73988
     -0.00139 |       0.00000 |       0.00091 |       0.00015 |       0.73956
     -0.00163 |       0.00000 |       0.00086 |       0.00019 |       0.73976
     -0.00192 |       0.00000 |       0.00083 |       0.00023 |       0.74000
     -0.00204 |       0.00000 |       0.00081 |       0.00027 |       0.73965
     -0.00221 |       0.00000 |       0.00079 |       0.00028 |       0.73962
     -0.00239 |       0.00000 |       0.00078 |       0.00030 |       0.73890
     -0.00245 |       0.00000 |       0.00076 |       0.00033 |       0.73929
Evaluating losses...
     -0.00253 |       0.00000 |       0.00073 |       0.00036 |     

     -0.00464 |       0.00000 |       0.00016 |       0.00058 |       0.76212
     -0.00507 |       0.00000 |       0.00015 |       0.00063 |       0.76221
     -0.00517 |       0.00000 |       0.00015 |       0.00062 |       0.76171
Evaluating losses...
     -0.00564 |       0.00000 |       0.00014 |       0.00067 |       0.76171
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.35          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6035          |
| TimeElapsed     | 2.43e+04      |
| TimestepsSoFar  | 16109568      |
| ev_tdlam_before | -0.431        |
| loss_ent        | 0.76171243    |
| loss_kl         | 0.00066685805 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0056379214 |
| loss_vf_loss    | 0.00014446811 |
-----------------------------------
********** Iteration 3933 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00059 |       0.00000 | 

********** Iteration 3938 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00062 |       0.00000 |       0.00113 |       0.00020 |       0.79590
     -0.00139 |       0.00000 |       0.00103 |       0.00033 |       0.79603
     -0.00182 |       0.00000 |       0.00098 |       0.00036 |       0.79573
     -0.00252 |       0.00000 |       0.00094 |       0.00042 |       0.79591
     -0.00265 |       0.00000 |       0.00092 |       0.00050 |       0.79585
     -0.00302 |       0.00000 |       0.00090 |       0.00047 |       0.79605
     -0.00320 |       0.00000 |       0.00088 |       0.00054 |       0.79575
     -0.00335 |       0.00000 |       0.00086 |       0.00053 |       0.79606
     -0.00362 |       0.00000 |       0.00085 |       0.00061 |       0.79605
     -0.00357 |       0.00000 |       0.00083 |       0.00063 |       0.79604
Evaluating losses...
     -0.00389 |       0.00000 |       0.00082 |       0.00067 |     

     -0.00340 |       0.00000 |      5.34e-05 |       0.00060 |       0.75624
     -0.00355 |       0.00000 |      5.23e-05 |       0.00060 |       0.75701
     -0.00369 |       0.00000 |      4.90e-05 |       0.00068 |       0.75675
Evaluating losses...
     -0.00397 |       0.00000 |      4.81e-05 |       0.00075 |       0.75702
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.36          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6050          |
| TimeElapsed     | 2.44e+04      |
| TimestepsSoFar  | 16154624      |
| ev_tdlam_before | -2.04         |
| loss_ent        | 0.75702053    |
| loss_kl         | 0.000752118   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.003970125  |
| loss_vf_loss    | 4.8112077e-05 |
-----------------------------------
********** Iteration 3944 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.64e-06 |       0.00000 | 

********** Iteration 3949 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00049 |       0.00000 |       0.00142 |       0.00027 |       0.75918
     -0.00083 |       0.00000 |       0.00127 |       0.00026 |       0.75834
     -0.00150 |       0.00000 |       0.00118 |       0.00045 |       0.75770
     -0.00188 |       0.00000 |       0.00110 |       0.00036 |       0.75827
     -0.00212 |       0.00000 |       0.00105 |       0.00044 |       0.75788
     -0.00233 |       0.00000 |       0.00100 |       0.00038 |       0.75720
     -0.00229 |       0.00000 |       0.00096 |       0.00045 |       0.75730
     -0.00258 |       0.00000 |       0.00093 |       0.00044 |       0.75778
     -0.00287 |       0.00000 |       0.00091 |       0.00049 |       0.75796
     -0.00278 |       0.00000 |       0.00089 |       0.00055 |       0.75776
Evaluating losses...
     -0.00327 |       0.00000 |       0.00086 |       0.00048 |     

     -0.00372 |       0.00000 |      3.35e-05 |       0.00040 |       0.76043
     -0.00390 |       0.00000 |      3.21e-05 |       0.00046 |       0.76016
     -0.00408 |       0.00000 |      3.10e-05 |       0.00044 |       0.76042
     -0.00427 |       0.00000 |      2.99e-05 |       0.00046 |       0.76042
Evaluating losses...
     -0.00492 |       0.00000 |      2.91e-05 |       0.00051 |       0.75971
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.38          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6065          |
| TimeElapsed     | 2.44e+04      |
| TimestepsSoFar  | 16199680      |
| ev_tdlam_before | -0.118        |
| loss_ent        | 0.7597111     |
| loss_kl         | 0.0005133482  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.004920776  |
| loss_vf_loss    | 2.9127792e-05 |
-----------------------------------
********** Iteration 3955 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 3960 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00011 |       0.00000 |       0.00014 |       0.00016 |       0.74464
     -0.00117 |       0.00000 |      9.66e-05 |       0.00038 |       0.74373
     -0.00145 |       0.00000 |      7.51e-05 |       0.00030 |       0.74485
     -0.00167 |       0.00000 |      6.30e-05 |       0.00026 |       0.74490
     -0.00183 |       0.00000 |      5.47e-05 |       0.00029 |       0.74482
     -0.00196 |       0.00000 |      5.00e-05 |       0.00030 |       0.74513
     -0.00213 |       0.00000 |      4.40e-05 |       0.00030 |       0.74508
     -0.00221 |       0.00000 |      4.01e-05 |       0.00030 |       0.74441
     -0.00235 |       0.00000 |      3.76e-05 |       0.00035 |       0.74447
     -0.00239 |       0.00000 |      3.48e-05 |       0.00042 |       0.74404
Evaluating losses...
     -0.00275 |       0.00000 |      3.33e-05 |       0.00038 |     

     -0.00240 |       0.00000 |      8.28e-05 |       0.00040 |       0.78699
     -0.00259 |       0.00000 |      7.38e-05 |       0.00043 |       0.78655
     -0.00271 |       0.00000 |      6.70e-05 |       0.00047 |       0.78673
Evaluating losses...
     -0.00293 |       0.00000 |      6.07e-05 |       0.00048 |       0.78695
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.33          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6080          |
| TimeElapsed     | 2.45e+04      |
| TimestepsSoFar  | 16244736      |
| ev_tdlam_before | -3.33         |
| loss_ent        | 0.7869491     |
| loss_kl         | 0.0004842507  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0029328861 |
| loss_vf_loss    | 6.0729704e-05 |
-----------------------------------
********** Iteration 3966 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00072 |       0.00000 | 

********** Iteration 3971 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00031 |       0.00000 |       0.00330 |       0.00073 |       0.77746
     -0.00166 |       0.00000 |       0.00294 |       0.00064 |       0.77899
     -0.00200 |       0.00000 |       0.00283 |       0.00055 |       0.77905
     -0.00237 |       0.00000 |       0.00277 |       0.00052 |       0.77852
     -0.00295 |       0.00000 |       0.00271 |       0.00043 |       0.77872
     -0.00305 |       0.00000 |       0.00267 |       0.00041 |       0.77910
     -0.00322 |       0.00000 |       0.00264 |       0.00051 |       0.77820
     -0.00324 |       0.00000 |       0.00261 |       0.00054 |       0.77855
     -0.00342 |       0.00000 |       0.00257 |       0.00055 |       0.77856
     -0.00326 |       0.00000 |       0.00255 |       0.00057 |       0.77825
Evaluating losses...
     -0.00376 |       0.00000 |       0.00249 |       0.00051 |     

     -0.00356 |       0.00000 |       0.00010 |       0.00047 |       0.80289
     -0.00342 |       0.00000 |      9.62e-05 |       0.00050 |       0.80272
     -0.00370 |       0.00000 |      9.19e-05 |       0.00054 |       0.80257
Evaluating losses...
     -0.00414 |       0.00000 |      8.90e-05 |       0.00054 |       0.80234
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.33          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6095          |
| TimeElapsed     | 2.47e+04      |
| TimestepsSoFar  | 16289792      |
| ev_tdlam_before | -0.697        |
| loss_ent        | 0.8023384     |
| loss_kl         | 0.00054144766 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0041417396 |
| loss_vf_loss    | 8.900714e-05  |
-----------------------------------
********** Iteration 3977 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     5.08e-05 |       0.00000 | 

********** Iteration 3982 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00013 |       0.00000 |      2.09e-05 |      8.57e-05 |       0.75565
     -0.00183 |       0.00000 |      1.63e-05 |       0.00014 |       0.75576
     -0.00249 |       0.00000 |      1.40e-05 |       0.00021 |       0.75602
     -0.00289 |       0.00000 |      1.24e-05 |       0.00026 |       0.75614
     -0.00332 |       0.00000 |      1.14e-05 |       0.00027 |       0.75570
     -0.00351 |       0.00000 |      1.05e-05 |       0.00032 |       0.75592
     -0.00368 |       0.00000 |      9.71e-06 |       0.00035 |       0.75614
     -0.00387 |       0.00000 |      9.31e-06 |       0.00037 |       0.75612
     -0.00412 |       0.00000 |      8.94e-06 |       0.00038 |       0.75616
     -0.00424 |       0.00000 |      8.63e-06 |       0.00042 |       0.75564
Evaluating losses...
     -0.00450 |       0.00000 |      8.38e-06 |       0.00042 |     

     -0.00253 |       0.00000 |       0.00180 |       0.00046 |       0.79864
     -0.00252 |       0.00000 |       0.00176 |       0.00049 |       0.79892
     -0.00263 |       0.00000 |       0.00170 |       0.00051 |       0.79848
Evaluating losses...
     -0.00272 |       0.00000 |       0.00166 |       0.00051 |       0.79872
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.3           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6110          |
| TimeElapsed     | 2.47e+04      |
| TimestepsSoFar  | 16334848      |
| ev_tdlam_before | 0.508         |
| loss_ent        | 0.79871756    |
| loss_kl         | 0.00051124406 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0027248035 |
| loss_vf_loss    | 0.0016624236  |
-----------------------------------
********** Iteration 3988 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00039 |       0.00000 | 

********** Iteration 3993 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -4.27e-05 |       0.00000 |       0.00106 |       0.00012 |       0.74131
     -0.00106 |       0.00000 |       0.00065 |       0.00019 |       0.74114
     -0.00128 |       0.00000 |       0.00055 |       0.00024 |       0.74084
     -0.00152 |       0.00000 |       0.00047 |       0.00027 |       0.74135
     -0.00166 |       0.00000 |       0.00042 |       0.00027 |       0.74162
     -0.00179 |       0.00000 |       0.00037 |       0.00031 |       0.74176
     -0.00194 |       0.00000 |       0.00035 |       0.00033 |       0.74162
     -0.00205 |       0.00000 |       0.00033 |       0.00036 |       0.74197
     -0.00209 |       0.00000 |       0.00032 |       0.00039 |       0.74185
     -0.00219 |       0.00000 |       0.00031 |       0.00039 |       0.74224
Evaluating losses...
     -0.00233 |       0.00000 |       0.00030 |       0.00040 |     

     -0.00251 |       0.00000 |       0.00070 |       0.00053 |       0.76527
     -0.00275 |       0.00000 |       0.00070 |       0.00057 |       0.76490
     -0.00280 |       0.00000 |       0.00069 |       0.00058 |       0.76467
Evaluating losses...
     -0.00279 |       0.00000 |       0.00068 |       0.00066 |       0.76456
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.3           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6125          |
| TimeElapsed     | 2.48e+04      |
| TimestepsSoFar  | 16379904      |
| ev_tdlam_before | 0.649         |
| loss_ent        | 0.76456434    |
| loss_kl         | 0.0006586804  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0027894187 |
| loss_vf_loss    | 0.0006822987  |
-----------------------------------
********** Iteration 3999 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00032 |       0.00000 | 

********** Iteration 4004 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00051 |       0.00000 |       0.00128 |       0.00012 |       0.74928
     -0.00048 |       0.00000 |       0.00123 |       0.00022 |       0.74979
     -0.00088 |       0.00000 |       0.00121 |       0.00022 |       0.74879
     -0.00135 |       0.00000 |       0.00120 |       0.00022 |       0.74852
     -0.00138 |       0.00000 |       0.00120 |       0.00028 |       0.74846
     -0.00156 |       0.00000 |       0.00118 |       0.00030 |       0.74846
     -0.00193 |       0.00000 |       0.00118 |       0.00029 |       0.74815
     -0.00154 |       0.00000 |       0.00116 |       0.00032 |       0.74896
     -0.00190 |       0.00000 |       0.00115 |       0.00029 |       0.74846
     -0.00199 |       0.00000 |       0.00114 |       0.00032 |       0.74806
Evaluating losses...
     -0.00243 |       0.00000 |       0.00115 |       0.00037 |     

     -0.00225 |       0.00000 |       0.00121 |       0.00031 |       0.78772
     -0.00233 |       0.00000 |       0.00114 |       0.00043 |       0.78768
     -0.00247 |       0.00000 |       0.00108 |       0.00037 |       0.78823
Evaluating losses...
     -0.00268 |       0.00000 |       0.00104 |       0.00037 |       0.78847
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.35          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6140          |
| TimeElapsed     | 2.48e+04      |
| TimestepsSoFar  | 16424960      |
| ev_tdlam_before | 0.222         |
| loss_ent        | 0.7884706     |
| loss_kl         | 0.00037219812 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.002681714  |
| loss_vf_loss    | 0.0010351802  |
-----------------------------------
********** Iteration 4010 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00068 |       0.00000 | 

********** Iteration 4015 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00043 |       0.00000 |       0.00163 |       0.00011 |       0.75720
     -0.00134 |       0.00000 |       0.00129 |       0.00023 |       0.75695
     -0.00174 |       0.00000 |       0.00111 |       0.00030 |       0.75739
     -0.00222 |       0.00000 |       0.00100 |       0.00028 |       0.75736
     -0.00244 |       0.00000 |       0.00090 |       0.00032 |       0.75736
     -0.00248 |       0.00000 |       0.00082 |       0.00031 |       0.75776
     -0.00271 |       0.00000 |       0.00074 |       0.00034 |       0.75808
     -0.00277 |       0.00000 |       0.00070 |       0.00038 |       0.75791
     -0.00259 |       0.00000 |       0.00066 |       0.00045 |       0.75779
     -0.00291 |       0.00000 |       0.00062 |       0.00041 |       0.75872
Evaluating losses...
     -0.00303 |       0.00000 |       0.00060 |       0.00046 |     

     -0.00404 |       0.00000 |      5.48e-05 |       0.00037 |       0.75920
     -0.00409 |       0.00000 |      5.24e-05 |       0.00042 |       0.75937
     -0.00431 |       0.00000 |      5.07e-05 |       0.00044 |       0.75925
Evaluating losses...
     -0.00474 |       0.00000 |      5.03e-05 |       0.00044 |       0.75903
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.33          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6155          |
| TimeElapsed     | 2.49e+04      |
| TimestepsSoFar  | 16470016      |
| ev_tdlam_before | -0.657        |
| loss_ent        | 0.7590329     |
| loss_kl         | 0.00044183616 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0047361413 |
| loss_vf_loss    | 5.0274408e-05 |
-----------------------------------
********** Iteration 4021 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00038 |       0.00000 | 

********** Iteration 4026 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00044 |       0.00000 |       0.00138 |       0.00019 |       0.78300
     -0.00096 |       0.00000 |       0.00105 |       0.00028 |       0.78211
     -0.00136 |       0.00000 |       0.00099 |       0.00029 |       0.78250
     -0.00196 |       0.00000 |       0.00097 |       0.00038 |       0.78238
     -0.00218 |       0.00000 |       0.00095 |       0.00038 |       0.78258
     -0.00235 |       0.00000 |       0.00093 |       0.00041 |       0.78280
     -0.00253 |       0.00000 |       0.00092 |       0.00045 |       0.78260
     -0.00246 |       0.00000 |       0.00091 |       0.00047 |       0.78262
     -0.00277 |       0.00000 |       0.00090 |       0.00056 |       0.78274
     -0.00287 |       0.00000 |       0.00089 |       0.00052 |       0.78260
Evaluating losses...
     -0.00311 |       0.00000 |       0.00088 |       0.00055 |     

     -0.00228 |       0.00000 |       0.00108 |       0.00027 |       0.76493
     -0.00243 |       0.00000 |       0.00103 |       0.00028 |       0.76488
     -0.00257 |       0.00000 |       0.00099 |       0.00030 |       0.76505
     -0.00273 |       0.00000 |       0.00096 |       0.00032 |       0.76524
     -0.00287 |       0.00000 |       0.00093 |       0.00033 |       0.76496
Evaluating losses...
     -0.00307 |       0.00000 |       0.00090 |       0.00034 |       0.76519
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.33          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6170          |
| TimeElapsed     | 2.51e+04      |
| TimestepsSoFar  | 16515072      |
| ev_tdlam_before | 0.519         |
| loss_ent        | 0.7651887     |
| loss_kl         | 0.00034204716 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0030688876 |
| loss_vf_loss    | 0.00090150145 |
-----------------------------------
*******

********** Iteration 4037 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00017 |       0.00000 |       0.00150 |       0.00016 |       0.72917
     -0.00144 |       0.00000 |       0.00138 |       0.00030 |       0.73229
     -0.00201 |       0.00000 |       0.00134 |       0.00026 |       0.73155
     -0.00221 |       0.00000 |       0.00131 |       0.00023 |       0.73111
     -0.00238 |       0.00000 |       0.00127 |       0.00027 |       0.73065
     -0.00231 |       0.00000 |       0.00126 |       0.00028 |       0.73017
     -0.00254 |       0.00000 |       0.00124 |       0.00031 |       0.73031
     -0.00269 |       0.00000 |       0.00124 |       0.00030 |       0.73017
     -0.00280 |       0.00000 |       0.00123 |       0.00033 |       0.73045
     -0.00286 |       0.00000 |       0.00122 |       0.00033 |       0.73044
Evaluating losses...
     -0.00289 |       0.00000 |       0.00120 |       0.00034 |     

     -0.00259 |       0.00000 |       0.00144 |       0.00036 |       0.77108
     -0.00269 |       0.00000 |       0.00139 |       0.00038 |       0.77131
     -0.00283 |       0.00000 |       0.00133 |       0.00040 |       0.77104
Evaluating losses...
     -0.00304 |       0.00000 |       0.00130 |       0.00041 |       0.77097
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.41          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6185          |
| TimeElapsed     | 2.51e+04      |
| TimestepsSoFar  | 16560128      |
| ev_tdlam_before | 0.0123        |
| loss_ent        | 0.7709701     |
| loss_kl         | 0.0004095644  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0030379803 |
| loss_vf_loss    | 0.0012995076  |
-----------------------------------
********** Iteration 4043 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00012 |       0.00000 | 

********** Iteration 4048 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -8.48e-05 |       0.00000 |       0.00447 |      9.64e-05 |       0.77700
     -0.00189 |       0.00000 |       0.00329 |       0.00019 |       0.77741
     -0.00244 |       0.00000 |       0.00299 |       0.00020 |       0.77721
     -0.00270 |       0.00000 |       0.00282 |       0.00025 |       0.77662
     -0.00285 |       0.00000 |       0.00270 |       0.00025 |       0.77683
     -0.00306 |       0.00000 |       0.00261 |       0.00030 |       0.77668
     -0.00317 |       0.00000 |       0.00255 |       0.00028 |       0.77694
     -0.00340 |       0.00000 |       0.00251 |       0.00032 |       0.77651
     -0.00358 |       0.00000 |       0.00246 |       0.00034 |       0.77703
     -0.00351 |       0.00000 |       0.00241 |       0.00038 |       0.77703
Evaluating losses...
     -0.00384 |       0.00000 |       0.00237 |       0.00039 |     

     -0.00179 |       0.00000 |       0.00146 |       0.00031 |       0.73250
     -0.00213 |       0.00000 |       0.00142 |       0.00033 |       0.73228
     -0.00213 |       0.00000 |       0.00138 |       0.00031 |       0.73244
Evaluating losses...
     -0.00233 |       0.00000 |       0.00135 |       0.00035 |       0.73196
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.38         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 6200         |
| TimeElapsed     | 2.52e+04     |
| TimestepsSoFar  | 16605184     |
| ev_tdlam_before | 0.218        |
| loss_ent        | 0.7319602    |
| loss_kl         | 0.000352903  |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.002326629 |
| loss_vf_loss    | 0.0013532032 |
----------------------------------
********** Iteration 4054 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00017 |       0.00000 |       0.00032 

********** Iteration 4059 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00042 |       0.00000 |      5.26e-05 |       0.00023 |       0.77870
     -0.00111 |       0.00000 |      4.13e-05 |       0.00028 |       0.77846
     -0.00216 |       0.00000 |      3.66e-05 |       0.00023 |       0.77850
     -0.00261 |       0.00000 |      3.33e-05 |       0.00024 |       0.77798
     -0.00304 |       0.00000 |      3.06e-05 |       0.00032 |       0.77808
     -0.00325 |       0.00000 |      2.88e-05 |       0.00029 |       0.77819
     -0.00340 |       0.00000 |      2.73e-05 |       0.00036 |       0.77825
     -0.00350 |       0.00000 |      2.56e-05 |       0.00034 |       0.77839
     -0.00378 |       0.00000 |      2.44e-05 |       0.00034 |       0.77811
     -0.00348 |       0.00000 |      2.34e-05 |       0.00043 |       0.77820
Evaluating losses...
     -0.00393 |       0.00000 |      2.24e-05 |       0.00042 |     

     -0.00415 |       0.00000 |      4.21e-05 |       0.00040 |       0.76771
     -0.00405 |       0.00000 |      3.95e-05 |       0.00042 |       0.76743
     -0.00435 |       0.00000 |      3.76e-05 |       0.00047 |       0.76762
Evaluating losses...
     -0.00497 |       0.00000 |      3.70e-05 |       0.00049 |       0.76766
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.38          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6215          |
| TimeElapsed     | 2.52e+04      |
| TimestepsSoFar  | 16650240      |
| ev_tdlam_before | -1.52         |
| loss_ent        | 0.7676614     |
| loss_kl         | 0.0004916938  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.00496578   |
| loss_vf_loss    | 3.7005033e-05 |
-----------------------------------
********** Iteration 4065 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00041 |       0.00000 | 

********** Iteration 4070 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -3.71e-05 |       0.00000 |       0.00305 |       0.00020 |       0.77515
     -0.00148 |       0.00000 |       0.00255 |       0.00034 |       0.77671
     -0.00190 |       0.00000 |       0.00240 |       0.00034 |       0.77618
     -0.00220 |       0.00000 |       0.00234 |       0.00036 |       0.77630
     -0.00236 |       0.00000 |       0.00230 |       0.00053 |       0.77647
     -0.00259 |       0.00000 |       0.00224 |       0.00053 |       0.77611
     -0.00267 |       0.00000 |       0.00219 |       0.00051 |       0.77632
     -0.00288 |       0.00000 |       0.00217 |       0.00042 |       0.77577
     -0.00304 |       0.00000 |       0.00213 |       0.00047 |       0.77616
     -0.00306 |       0.00000 |       0.00208 |       0.00046 |       0.77586
Evaluating losses...
     -0.00315 |       0.00000 |       0.00206 |       0.00070 |     

     -0.00274 |       0.00000 |       0.00160 |       0.00059 |       0.76539
     -0.00283 |       0.00000 |       0.00157 |       0.00055 |       0.76526
     -0.00298 |       0.00000 |       0.00156 |       0.00060 |       0.76567
Evaluating losses...
     -0.00326 |       0.00000 |       0.00153 |       0.00062 |       0.76563
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.34          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6230          |
| TimeElapsed     | 2.52e+04      |
| TimestepsSoFar  | 16695296      |
| ev_tdlam_before | 0.719         |
| loss_ent        | 0.7656254     |
| loss_kl         | 0.0006244476  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0032554488 |
| loss_vf_loss    | 0.0015314525  |
-----------------------------------
********** Iteration 4076 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00073 |       0.00000 | 

********** Iteration 4081 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00081 |       0.00000 |       0.00148 |       0.00023 |       0.77038
     -0.00054 |       0.00000 |       0.00144 |       0.00032 |       0.77025
     -0.00080 |       0.00000 |       0.00141 |       0.00038 |       0.76970
     -0.00117 |       0.00000 |       0.00139 |       0.00024 |       0.76976
     -0.00144 |       0.00000 |       0.00137 |       0.00025 |       0.76971
     -0.00160 |       0.00000 |       0.00135 |       0.00021 |       0.76947
     -0.00170 |       0.00000 |       0.00134 |       0.00021 |       0.76915
     -0.00176 |       0.00000 |       0.00132 |       0.00024 |       0.76955
     -0.00199 |       0.00000 |       0.00130 |       0.00022 |       0.76946
     -0.00185 |       0.00000 |       0.00130 |       0.00025 |       0.76923
Evaluating losses...
     -0.00212 |       0.00000 |       0.00132 |       0.00023 |     

     -0.00284 |       0.00000 |       0.00087 |       0.00026 |       0.79207
     -0.00295 |       0.00000 |       0.00082 |       0.00027 |       0.79179
     -0.00301 |       0.00000 |       0.00080 |       0.00028 |       0.79226
Evaluating losses...
     -0.00317 |       0.00000 |       0.00077 |       0.00029 |       0.79234
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.31          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6245          |
| TimeElapsed     | 2.53e+04      |
| TimestepsSoFar  | 16740352      |
| ev_tdlam_before | 0.625         |
| loss_ent        | 0.7923361     |
| loss_kl         | 0.00029205516 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.00316899   |
| loss_vf_loss    | 0.0007654272  |
-----------------------------------
********** Iteration 4087 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00070 |       0.00000 | 

********** Iteration 4092 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00078 |       0.00000 |       0.00198 |       0.00108 |       0.77354
     -0.00067 |       0.00000 |       0.00182 |       0.00068 |       0.77400
     -0.00156 |       0.00000 |       0.00178 |       0.00053 |       0.77439
     -0.00180 |       0.00000 |       0.00174 |       0.00044 |       0.77510
     -0.00204 |       0.00000 |       0.00169 |       0.00046 |       0.77523
     -0.00224 |       0.00000 |       0.00168 |       0.00043 |       0.77546
     -0.00232 |       0.00000 |       0.00166 |       0.00044 |       0.77573
     -0.00241 |       0.00000 |       0.00164 |       0.00047 |       0.77543
     -0.00248 |       0.00000 |       0.00162 |       0.00051 |       0.77568
     -0.00266 |       0.00000 |       0.00161 |       0.00049 |       0.77554
Evaluating losses...
     -0.00280 |       0.00000 |       0.00160 |       0.00051 |     

     -0.00211 |       0.00000 |       0.00229 |       0.00041 |       0.82034
     -0.00205 |       0.00000 |       0.00225 |       0.00046 |       0.81976
     -0.00219 |       0.00000 |       0.00221 |       0.00045 |       0.82036
Evaluating losses...
     -0.00231 |       0.00000 |       0.00216 |       0.00051 |       0.81957
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.29          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6259          |
| TimeElapsed     | 2.55e+04      |
| TimestepsSoFar  | 16785408      |
| ev_tdlam_before | 0.407         |
| loss_ent        | 0.81956804    |
| loss_kl         | 0.0005051474  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0023106472 |
| loss_vf_loss    | 0.0021634982  |
-----------------------------------
********** Iteration 4098 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00110 |       0.00000 | 

********** Iteration 4103 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00065 |       0.00000 |       0.00012 |       0.00012 |       0.78164
     -0.00204 |       0.00000 |      7.08e-05 |       0.00024 |       0.78136
     -0.00239 |       0.00000 |      6.59e-05 |       0.00027 |       0.78198
     -0.00266 |       0.00000 |      6.16e-05 |       0.00028 |       0.78149
     -0.00287 |       0.00000 |      6.06e-05 |       0.00031 |       0.78167
     -0.00296 |       0.00000 |      5.94e-05 |       0.00034 |       0.78157
     -0.00303 |       0.00000 |      5.69e-05 |       0.00040 |       0.78151
     -0.00313 |       0.00000 |      5.50e-05 |       0.00041 |       0.78172
     -0.00320 |       0.00000 |      5.41e-05 |       0.00043 |       0.78141
     -0.00334 |       0.00000 |      5.30e-05 |       0.00042 |       0.78164
Evaluating losses...
     -0.00348 |       0.00000 |      5.19e-05 |       0.00043 |     

     -0.00242 |       0.00000 |       0.00127 |       0.00032 |       0.75446
     -0.00242 |       0.00000 |       0.00126 |       0.00034 |       0.75445
     -0.00260 |       0.00000 |       0.00123 |       0.00039 |       0.75475
Evaluating losses...
     -0.00283 |       0.00000 |       0.00120 |       0.00040 |       0.75511
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.17          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6274          |
| TimeElapsed     | 2.55e+04      |
| TimestepsSoFar  | 16830464      |
| ev_tdlam_before | 0.684         |
| loss_ent        | 0.755107      |
| loss_kl         | 0.00039854238 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0028320309 |
| loss_vf_loss    | 0.0012009287  |
-----------------------------------
********** Iteration 4109 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00029 |       0.00000 | 

********** Iteration 4114 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00047 |       0.00000 |       0.00147 |      9.81e-05 |       0.76804
     -0.00062 |       0.00000 |       0.00132 |       0.00014 |       0.76791
     -0.00135 |       0.00000 |       0.00126 |       0.00022 |       0.76806
     -0.00179 |       0.00000 |       0.00120 |       0.00022 |       0.76740
     -0.00175 |       0.00000 |       0.00117 |       0.00024 |       0.76749
     -0.00193 |       0.00000 |       0.00113 |       0.00026 |       0.76745
     -0.00173 |       0.00000 |       0.00111 |       0.00031 |       0.76762
     -0.00203 |       0.00000 |       0.00107 |       0.00034 |       0.76797
     -0.00232 |       0.00000 |       0.00104 |       0.00033 |       0.76727
     -0.00205 |       0.00000 |       0.00101 |       0.00039 |       0.76802
Evaluating losses...
     -0.00235 |       0.00000 |       0.00099 |       0.00039 |     

     -0.00249 |       0.00000 |       0.00269 |       0.00041 |       0.77824
     -0.00256 |       0.00000 |       0.00263 |       0.00042 |       0.77750
     -0.00248 |       0.00000 |       0.00259 |       0.00052 |       0.77824
Evaluating losses...
     -0.00278 |       0.00000 |       0.00253 |       0.00042 |       0.77796
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.17          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6289          |
| TimeElapsed     | 2.56e+04      |
| TimestepsSoFar  | 16875520      |
| ev_tdlam_before | 0.37          |
| loss_ent        | 0.77795696    |
| loss_kl         | 0.00041693557 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.002779588  |
| loss_vf_loss    | 0.0025349197  |
-----------------------------------
********** Iteration 4120 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00083 |       0.00000 | 

********** Iteration 4125 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00014 |       0.00000 |       0.00038 |       0.00010 |       0.77985
     -0.00108 |       0.00000 |       0.00025 |       0.00013 |       0.78058
     -0.00144 |       0.00000 |       0.00019 |       0.00012 |       0.78061
     -0.00168 |       0.00000 |       0.00015 |       0.00018 |       0.78039
     -0.00186 |       0.00000 |       0.00013 |       0.00020 |       0.78030
     -0.00209 |       0.00000 |       0.00011 |       0.00020 |       0.78005
     -0.00222 |       0.00000 |       0.00010 |       0.00022 |       0.78019
     -0.00230 |       0.00000 |      9.24e-05 |       0.00024 |       0.78031
     -0.00241 |       0.00000 |      8.66e-05 |       0.00026 |       0.78037
     -0.00255 |       0.00000 |      8.13e-05 |       0.00025 |       0.78013
Evaluating losses...
     -0.00275 |       0.00000 |      8.08e-05 |       0.00025 |     

     -0.00323 |       0.00000 |       0.00070 |       0.00031 |       0.77452
     -0.00344 |       0.00000 |       0.00069 |       0.00032 |       0.77426
     -0.00345 |       0.00000 |       0.00068 |       0.00034 |       0.77439
Evaluating losses...
     -0.00365 |       0.00000 |       0.00067 |       0.00037 |       0.77411
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.24          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6304          |
| TimeElapsed     | 2.56e+04      |
| TimestepsSoFar  | 16920576      |
| ev_tdlam_before | 0.78          |
| loss_ent        | 0.7741062     |
| loss_kl         | 0.00037068315 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0036473286 |
| loss_vf_loss    | 0.0006744107  |
-----------------------------------
********** Iteration 4131 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.36e-05 |       0.00000 | 

********** Iteration 4136 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     5.40e-05 |       0.00000 |       0.00308 |       0.00051 |       0.76479
     -0.00131 |       0.00000 |       0.00302 |       0.00060 |       0.76432
     -0.00187 |       0.00000 |       0.00297 |       0.00076 |       0.76489
     -0.00217 |       0.00000 |       0.00296 |       0.00053 |       0.76526
     -0.00236 |       0.00000 |       0.00294 |       0.00051 |       0.76497
     -0.00254 |       0.00000 |       0.00294 |       0.00042 |       0.76433
     -0.00268 |       0.00000 |       0.00293 |       0.00043 |       0.76507
     -0.00283 |       0.00000 |       0.00292 |       0.00043 |       0.76468
     -0.00277 |       0.00000 |       0.00291 |       0.00066 |       0.76483
     -0.00290 |       0.00000 |       0.00291 |       0.00065 |       0.76536
Evaluating losses...
     -0.00306 |       0.00000 |       0.00288 |       0.00066 |     

     -0.00333 |       0.00000 |      6.36e-05 |       0.00028 |       0.78447
     -0.00347 |       0.00000 |      6.02e-05 |       0.00028 |       0.78461
     -0.00362 |       0.00000 |      5.71e-05 |       0.00031 |       0.78426
Evaluating losses...
     -0.00411 |       0.00000 |      5.49e-05 |       0.00033 |       0.78414
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.24          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6319          |
| TimeElapsed     | 2.56e+04      |
| TimestepsSoFar  | 16965632      |
| ev_tdlam_before | -2.45         |
| loss_ent        | 0.7841401     |
| loss_kl         | 0.00033185346 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.004111982  |
| loss_vf_loss    | 5.4942324e-05 |
-----------------------------------
********** Iteration 4142 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00022 |       0.00000 | 

********** Iteration 4147 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.64e-06 |       0.00000 |       0.00216 |      9.97e-05 |       0.80372
     -0.00101 |       0.00000 |       0.00142 |       0.00017 |       0.80471
     -0.00149 |       0.00000 |       0.00113 |       0.00017 |       0.80467
     -0.00149 |       0.00000 |       0.00096 |       0.00021 |       0.80464
     -0.00163 |       0.00000 |       0.00083 |       0.00021 |       0.80481
     -0.00184 |       0.00000 |       0.00075 |       0.00021 |       0.80486
     -0.00197 |       0.00000 |       0.00068 |       0.00024 |       0.80484
     -0.00214 |       0.00000 |       0.00064 |       0.00026 |       0.80478
     -0.00203 |       0.00000 |       0.00060 |       0.00029 |       0.80467
     -0.00225 |       0.00000 |       0.00057 |       0.00031 |       0.80474
Evaluating losses...
     -0.00250 |       0.00000 |       0.00054 |       0.00031 |     

     -0.00378 |       0.00000 |      5.09e-05 |       0.00029 |       0.79652
     -0.00390 |       0.00000 |      4.81e-05 |       0.00028 |       0.79670
     -0.00404 |       0.00000 |      4.60e-05 |       0.00034 |       0.79642
     -0.00429 |       0.00000 |      4.40e-05 |       0.00033 |       0.79638
Evaluating losses...
     -0.00463 |       0.00000 |      4.29e-05 |       0.00034 |       0.79693
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.27          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6334          |
| TimeElapsed     | 2.58e+04      |
| TimestepsSoFar  | 17010688      |
| ev_tdlam_before | -0.908        |
| loss_ent        | 0.79693353    |
| loss_kl         | 0.00034091898 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.004634435  |
| loss_vf_loss    | 4.290577e-05  |
-----------------------------------
********** Iteration 4153 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 4158 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -8.34e-05 |       0.00000 |       0.00245 |      7.75e-05 |       0.77334
     -0.00107 |       0.00000 |       0.00197 |       0.00016 |       0.77216
     -0.00147 |       0.00000 |       0.00167 |       0.00018 |       0.77257
     -0.00165 |       0.00000 |       0.00148 |       0.00017 |       0.77298
     -0.00165 |       0.00000 |       0.00138 |       0.00019 |       0.77281
     -0.00191 |       0.00000 |       0.00132 |       0.00020 |       0.77309
     -0.00192 |       0.00000 |       0.00128 |       0.00021 |       0.77305
     -0.00202 |       0.00000 |       0.00124 |       0.00022 |       0.77295
     -0.00203 |       0.00000 |       0.00122 |       0.00026 |       0.77351
     -0.00210 |       0.00000 |       0.00118 |       0.00027 |       0.77332
Evaluating losses...
     -0.00238 |       0.00000 |       0.00116 |       0.00026 |     

     -0.00253 |       0.00000 |       0.00145 |       0.00029 |       0.79567
     -0.00270 |       0.00000 |       0.00140 |       0.00032 |       0.79600
     -0.00285 |       0.00000 |       0.00136 |       0.00033 |       0.79561
Evaluating losses...
     -0.00308 |       0.00000 |       0.00131 |       0.00034 |       0.79534
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.34          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6349          |
| TimeElapsed     | 2.59e+04      |
| TimestepsSoFar  | 17055744      |
| ev_tdlam_before | 0.335         |
| loss_ent        | 0.7953386     |
| loss_kl         | 0.00034104346 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0030801636 |
| loss_vf_loss    | 0.0013139182  |
-----------------------------------
********** Iteration 4164 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00052 |       0.00000 | 

********** Iteration 4169 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     8.13e-05 |       0.00000 |       0.00441 |       0.00018 |       0.81250
     -0.00115 |       0.00000 |       0.00414 |       0.00031 |       0.81209
     -0.00142 |       0.00000 |       0.00406 |       0.00084 |       0.81118
     -0.00195 |       0.00000 |       0.00398 |       0.00053 |       0.81146
     -0.00224 |       0.00000 |       0.00392 |       0.00036 |       0.81134
     -0.00244 |       0.00000 |       0.00390 |       0.00037 |       0.81121
     -0.00257 |       0.00000 |       0.00388 |       0.00031 |       0.81177
     -0.00270 |       0.00000 |       0.00386 |       0.00036 |       0.81199
     -0.00279 |       0.00000 |       0.00385 |       0.00033 |       0.81176
     -0.00287 |       0.00000 |       0.00383 |       0.00037 |       0.81221
Evaluating losses...
     -0.00298 |       0.00000 |       0.00382 |       0.00044 |     

     -0.00377 |       0.00000 |      1.47e-05 |       0.00031 |       0.78715
     -0.00390 |       0.00000 |      1.42e-05 |       0.00035 |       0.78736
     -0.00410 |       0.00000 |      1.39e-05 |       0.00035 |       0.78738
Evaluating losses...
     -0.00456 |       0.00000 |      1.40e-05 |       0.00036 |       0.78773
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.38          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6364          |
| TimeElapsed     | 2.59e+04      |
| TimestepsSoFar  | 17100800      |
| ev_tdlam_before | -0.431        |
| loss_ent        | 0.7877327     |
| loss_kl         | 0.00036462813 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0045575397 |
| loss_vf_loss    | 1.3982147e-05 |
-----------------------------------
********** Iteration 4175 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -8.37e-05 |       0.00000 | 

********** Iteration 4180 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00071 |       0.00000 |       0.00225 |      9.01e-05 |       0.81119
     -0.00077 |       0.00000 |       0.00152 |       0.00015 |       0.81134
     -0.00107 |       0.00000 |       0.00138 |       0.00018 |       0.81099
     -0.00124 |       0.00000 |       0.00136 |       0.00024 |       0.81132
     -0.00160 |       0.00000 |       0.00133 |       0.00027 |       0.81081
     -0.00152 |       0.00000 |       0.00132 |       0.00030 |       0.81067
     -0.00189 |       0.00000 |       0.00130 |       0.00035 |       0.81083
     -0.00179 |       0.00000 |       0.00129 |       0.00039 |       0.81039
     -0.00207 |       0.00000 |       0.00128 |       0.00039 |       0.80987
     -0.00209 |       0.00000 |       0.00127 |       0.00042 |       0.81008
Evaluating losses...
     -0.00230 |       0.00000 |       0.00124 |       0.00043 |     

     -0.00379 |       0.00000 |      4.95e-05 |       0.00034 |       0.78401
     -0.00402 |       0.00000 |      4.78e-05 |       0.00036 |       0.78412
     -0.00417 |       0.00000 |      4.64e-05 |       0.00038 |       0.78371
Evaluating losses...
     -0.00460 |       0.00000 |      4.56e-05 |       0.00038 |       0.78380
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.4           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6379          |
| TimeElapsed     | 2.59e+04      |
| TimestepsSoFar  | 17145856      |
| ev_tdlam_before | -0.413        |
| loss_ent        | 0.78380126    |
| loss_kl         | 0.0003762557  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0045999503 |
| loss_vf_loss    | 4.5551555e-05 |
-----------------------------------
********** Iteration 4186 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     6.55e-06 |       0.00000 | 

********** Iteration 4191 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00189 |       0.00000 |      7.93e-05 |       0.00022 |       0.75481
      0.00068 |       0.00000 |      5.58e-05 |       0.00023 |       0.75343
     -0.00109 |       0.00000 |      4.55e-05 |       0.00015 |       0.75467
      0.00015 |       0.00000 |      3.94e-05 |       0.00030 |       0.75430
     -0.00138 |       0.00000 |      3.40e-05 |       0.00027 |       0.75482
     -0.00078 |       0.00000 |      3.10e-05 |       0.00026 |       0.75407
     -0.00116 |       0.00000 |      2.81e-05 |       0.00024 |       0.75420
     -0.00140 |       0.00000 |      2.54e-05 |       0.00029 |       0.75512
     -0.00158 |       0.00000 |      2.36e-05 |       0.00031 |       0.75426
     -0.00224 |       0.00000 |      2.19e-05 |       0.00030 |       0.75494
Evaluating losses...
     -0.00199 |       0.00000 |      2.09e-05 |       0.00028 |     

     -0.00302 |       0.00000 |       0.00207 |       0.00027 |       0.80258
     -0.00311 |       0.00000 |       0.00203 |       0.00031 |       0.80245
     -0.00322 |       0.00000 |       0.00200 |       0.00031 |       0.80183
Evaluating losses...
     -0.00355 |       0.00000 |       0.00197 |       0.00030 |       0.80268
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.36          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6394          |
| TimeElapsed     | 2.6e+04       |
| TimestepsSoFar  | 17190912      |
| ev_tdlam_before | 0.413         |
| loss_ent        | 0.80268383    |
| loss_kl         | 0.00029938537 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0035492147 |
| loss_vf_loss    | 0.001967703   |
-----------------------------------
********** Iteration 4197 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00035 |       0.00000 | 

********** Iteration 4202 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00027 |       0.00000 |       0.00200 |       0.00030 |       0.75750
     -0.00085 |       0.00000 |       0.00197 |       0.00041 |       0.75680
     -0.00109 |       0.00000 |       0.00194 |       0.00025 |       0.75657
     -0.00125 |       0.00000 |       0.00190 |       0.00033 |       0.75672
     -0.00137 |       0.00000 |       0.00187 |       0.00020 |       0.75764
     -0.00142 |       0.00000 |       0.00186 |       0.00024 |       0.75751
     -0.00153 |       0.00000 |       0.00184 |       0.00019 |       0.75782
     -0.00158 |       0.00000 |       0.00181 |       0.00029 |       0.75762
     -0.00156 |       0.00000 |       0.00181 |       0.00030 |       0.75807
     -0.00160 |       0.00000 |       0.00179 |       0.00027 |       0.75777
Evaluating losses...
     -0.00179 |       0.00000 |       0.00176 |       0.00023 |     

     -0.00185 |       0.00000 |       0.00120 |       0.00024 |       0.77679
     -0.00199 |       0.00000 |       0.00117 |       0.00027 |       0.77727
     -0.00190 |       0.00000 |       0.00116 |       0.00029 |       0.77729
Evaluating losses...
     -0.00218 |       0.00000 |       0.00114 |       0.00030 |       0.77695
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.29          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6409          |
| TimeElapsed     | 2.6e+04       |
| TimestepsSoFar  | 17235968      |
| ev_tdlam_before | 0.409         |
| loss_ent        | 0.7769481     |
| loss_kl         | 0.00029641628 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0021783689 |
| loss_vf_loss    | 0.0011389432  |
-----------------------------------
********** Iteration 4208 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00028 |       0.00000 | 

********** Iteration 4213 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00078 |       0.00000 |      7.45e-05 |       0.00019 |       0.74245
     -0.00097 |       0.00000 |      6.13e-05 |       0.00022 |       0.74236
     -0.00151 |       0.00000 |      5.53e-05 |       0.00017 |       0.74183
     -0.00198 |       0.00000 |      5.21e-05 |       0.00017 |       0.74124
     -0.00230 |       0.00000 |      4.94e-05 |       0.00021 |       0.74129
     -0.00235 |       0.00000 |      4.72e-05 |       0.00023 |       0.74127
     -0.00262 |       0.00000 |      4.54e-05 |       0.00025 |       0.74173
     -0.00274 |       0.00000 |      4.41e-05 |       0.00028 |       0.74155
     -0.00295 |       0.00000 |      4.26e-05 |       0.00026 |       0.74144
     -0.00297 |       0.00000 |      4.14e-05 |       0.00027 |       0.74149
Evaluating losses...
     -0.00307 |       0.00000 |      4.02e-05 |       0.00029 |     

     -0.00308 |       0.00000 |      1.67e-05 |       0.00023 |       0.76474
     -0.00323 |       0.00000 |      1.59e-05 |       0.00026 |       0.76508
     -0.00329 |       0.00000 |      1.55e-05 |       0.00028 |       0.76444
Evaluating losses...
     -0.00329 |       0.00000 |      1.53e-05 |       0.00033 |       0.76411
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.31          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6424          |
| TimeElapsed     | 2.62e+04      |
| TimestepsSoFar  | 17281024      |
| ev_tdlam_before | -2.8          |
| loss_ent        | 0.7641062     |
| loss_kl         | 0.00032600286 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.003289805  |
| loss_vf_loss    | 1.5334545e-05 |
-----------------------------------
********** Iteration 4219 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00020 |       0.00000 | 

********** Iteration 4224 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     7.52e-05 |       0.00000 |       0.00016 |      8.77e-05 |       0.78669
     -0.00131 |       0.00000 |       0.00013 |       0.00018 |       0.78671
     -0.00223 |       0.00000 |       0.00012 |       0.00017 |       0.78686
     -0.00246 |       0.00000 |       0.00011 |       0.00020 |       0.78681
     -0.00291 |       0.00000 |      9.97e-05 |       0.00021 |       0.78700
     -0.00319 |       0.00000 |      9.26e-05 |       0.00025 |       0.78713
     -0.00339 |       0.00000 |      8.71e-05 |       0.00026 |       0.78737
     -0.00358 |       0.00000 |      8.20e-05 |       0.00030 |       0.78752
     -0.00374 |       0.00000 |      7.79e-05 |       0.00030 |       0.78723
     -0.00386 |       0.00000 |      7.43e-05 |       0.00032 |       0.78741
Evaluating losses...
     -0.00419 |       0.00000 |      7.24e-05 |       0.00032 |     

     -0.00296 |       0.00000 |      9.04e-05 |       0.00033 |       0.75245
     -0.00308 |       0.00000 |      8.36e-05 |       0.00037 |       0.75214
     -0.00317 |       0.00000 |      7.84e-05 |       0.00040 |       0.75246
Evaluating losses...
     -0.00346 |       0.00000 |      7.56e-05 |       0.00041 |       0.75248
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.31          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6439          |
| TimeElapsed     | 2.63e+04      |
| TimestepsSoFar  | 17326080      |
| ev_tdlam_before | -0.99         |
| loss_ent        | 0.75248355    |
| loss_kl         | 0.0004118006  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0034641502 |
| loss_vf_loss    | 7.558909e-05  |
-----------------------------------
********** Iteration 4230 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00047 |       0.00000 | 

********** Iteration 4235 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00015 |       0.00000 |       0.00192 |       0.00020 |       0.76537
     -0.00067 |       0.00000 |       0.00188 |       0.00017 |       0.76557
     -0.00121 |       0.00000 |       0.00184 |       0.00011 |       0.76508
     -0.00135 |       0.00000 |       0.00182 |       0.00014 |       0.76509
     -0.00149 |       0.00000 |       0.00178 |       0.00016 |       0.76548
     -0.00153 |       0.00000 |       0.00176 |       0.00021 |       0.76555
     -0.00163 |       0.00000 |       0.00175 |       0.00019 |       0.76545
     -0.00177 |       0.00000 |       0.00172 |       0.00020 |       0.76557
     -0.00183 |       0.00000 |       0.00170 |       0.00020 |       0.76534
     -0.00190 |       0.00000 |       0.00169 |       0.00023 |       0.76511
Evaluating losses...
     -0.00179 |       0.00000 |       0.00168 |       0.00026 |     

     -0.00154 |       0.00000 |       0.00030 |       0.00019 |       0.78541
     -0.00157 |       0.00000 |       0.00030 |       0.00021 |       0.78517
     -0.00164 |       0.00000 |       0.00029 |       0.00021 |       0.78520
Evaluating losses...
     -0.00173 |       0.00000 |       0.00029 |       0.00021 |       0.78521
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.2           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6454          |
| TimeElapsed     | 2.63e+04      |
| TimestepsSoFar  | 17371136      |
| ev_tdlam_before | 0.454         |
| loss_ent        | 0.7852066     |
| loss_kl         | 0.00021469411 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0017320139 |
| loss_vf_loss    | 0.00029050326 |
-----------------------------------
********** Iteration 4241 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00034 |       0.00000 | 

********** Iteration 4246 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00019 |       0.00000 |       0.00154 |      4.62e-05 |       0.77516
     -0.00095 |       0.00000 |       0.00121 |      8.73e-05 |       0.77531
     -0.00125 |       0.00000 |       0.00102 |       0.00010 |       0.77541
     -0.00151 |       0.00000 |       0.00091 |       0.00012 |       0.77545
     -0.00173 |       0.00000 |       0.00083 |       0.00012 |       0.77525
     -0.00189 |       0.00000 |       0.00077 |       0.00016 |       0.77510
     -0.00214 |       0.00000 |       0.00072 |       0.00015 |       0.77514
     -0.00216 |       0.00000 |       0.00069 |       0.00019 |       0.77508
     -0.00229 |       0.00000 |       0.00066 |       0.00020 |       0.77498
     -0.00235 |       0.00000 |       0.00064 |       0.00021 |       0.77548
Evaluating losses...
     -0.00258 |       0.00000 |       0.00062 |       0.00024 |     

     -0.00171 |       0.00000 |       0.00060 |       0.00019 |       0.79398
     -0.00175 |       0.00000 |       0.00059 |       0.00021 |       0.79383
     -0.00189 |       0.00000 |       0.00059 |       0.00021 |       0.79405
Evaluating losses...
     -0.00207 |       0.00000 |       0.00057 |       0.00021 |       0.79372
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.26          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6469          |
| TimeElapsed     | 2.63e+04      |
| TimestepsSoFar  | 17416192      |
| ev_tdlam_before | 0.723         |
| loss_ent        | 0.793716      |
| loss_kl         | 0.00021487923 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.002066122  |
| loss_vf_loss    | 0.00057493564 |
-----------------------------------
********** Iteration 4252 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00017 |       0.00000 | 

********** Iteration 4257 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00026 |       0.00000 |      5.11e-05 |      8.61e-05 |       0.73424
     -0.00137 |       0.00000 |      4.56e-05 |       0.00013 |       0.73405
     -0.00212 |       0.00000 |      4.26e-05 |       0.00017 |       0.73381
     -0.00244 |       0.00000 |      4.06e-05 |       0.00018 |       0.73377
     -0.00261 |       0.00000 |      3.88e-05 |       0.00022 |       0.73386
     -0.00289 |       0.00000 |      3.74e-05 |       0.00024 |       0.73427
     -0.00316 |       0.00000 |      3.62e-05 |       0.00023 |       0.73392
     -0.00322 |       0.00000 |      3.47e-05 |       0.00027 |       0.73374
     -0.00345 |       0.00000 |      3.39e-05 |       0.00025 |       0.73386
     -0.00351 |       0.00000 |      3.29e-05 |       0.00027 |       0.73380
Evaluating losses...
     -0.00384 |       0.00000 |      3.28e-05 |       0.00030 |     

     -0.00162 |       0.00000 |       0.00112 |       0.00019 |       0.76519
     -0.00171 |       0.00000 |       0.00107 |       0.00019 |       0.76519
     -0.00177 |       0.00000 |       0.00102 |       0.00020 |       0.76523
Evaluating losses...
     -0.00180 |       0.00000 |       0.00100 |       0.00020 |       0.76514
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.29          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6484          |
| TimeElapsed     | 2.64e+04      |
| TimestepsSoFar  | 17461248      |
| ev_tdlam_before | 0.194         |
| loss_ent        | 0.7651356     |
| loss_kl         | 0.00020141076 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0017976225 |
| loss_vf_loss    | 0.0009959567  |
-----------------------------------
********** Iteration 4263 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00031 |       0.00000 | 

********** Iteration 4268 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00023 |       0.00000 |      4.82e-05 |      7.67e-05 |       0.83210
     -0.00125 |       0.00000 |      4.15e-05 |       0.00011 |       0.83205
     -0.00165 |       0.00000 |      3.66e-05 |       0.00013 |       0.83237
     -0.00194 |       0.00000 |      3.33e-05 |       0.00013 |       0.83246
     -0.00221 |       0.00000 |      3.10e-05 |       0.00016 |       0.83256
     -0.00228 |       0.00000 |      2.85e-05 |       0.00017 |       0.83251
     -0.00246 |       0.00000 |      2.69e-05 |       0.00018 |       0.83226
     -0.00263 |       0.00000 |      2.55e-05 |       0.00019 |       0.83247
     -0.00279 |       0.00000 |      2.42e-05 |       0.00019 |       0.83232
     -0.00286 |       0.00000 |      2.31e-05 |       0.00020 |       0.83228
Evaluating losses...
     -0.00318 |       0.00000 |      2.22e-05 |       0.00019 |     

     -0.00183 |       0.00000 |       0.00074 |       0.00015 |       0.77445
     -0.00186 |       0.00000 |       0.00069 |       0.00016 |       0.77425
     -0.00196 |       0.00000 |       0.00065 |       0.00017 |       0.77446
     -0.00201 |       0.00000 |       0.00061 |       0.00018 |       0.77435
Evaluating losses...
     -0.00209 |       0.00000 |       0.00058 |       0.00018 |       0.77460
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.28          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6499          |
| TimeElapsed     | 2.66e+04      |
| TimestepsSoFar  | 17506304      |
| ev_tdlam_before | 0.579         |
| loss_ent        | 0.7746015     |
| loss_kl         | 0.00018404779 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0020937102 |
| loss_vf_loss    | 0.0005809062  |
-----------------------------------
********** Iteration 4274 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 4279 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00015 |       0.00000 |       0.00515 |      6.21e-05 |       0.76873
     -0.00146 |       0.00000 |       0.00399 |       0.00013 |       0.76893
     -0.00199 |       0.00000 |       0.00376 |       0.00014 |       0.76861
     -0.00235 |       0.00000 |       0.00358 |       0.00015 |       0.76869
     -0.00254 |       0.00000 |       0.00342 |       0.00017 |       0.76908
     -0.00280 |       0.00000 |       0.00327 |       0.00019 |       0.76893
     -0.00287 |       0.00000 |       0.00313 |       0.00021 |       0.76896
     -0.00297 |       0.00000 |       0.00302 |       0.00023 |       0.76867
     -0.00310 |       0.00000 |       0.00290 |       0.00023 |       0.76888
     -0.00325 |       0.00000 |       0.00283 |       0.00024 |       0.76892
Evaluating losses...
     -0.00350 |       0.00000 |       0.00277 |       0.00024 |     

     -0.00175 |       0.00000 |       0.00146 |       0.00019 |       0.74905
     -0.00181 |       0.00000 |       0.00146 |       0.00021 |       0.74901
     -0.00186 |       0.00000 |       0.00145 |       0.00020 |       0.74931
Evaluating losses...
     -0.00194 |       0.00000 |       0.00144 |       0.00020 |       0.74914
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.36          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6514          |
| TimeElapsed     | 2.66e+04      |
| TimestepsSoFar  | 17551360      |
| ev_tdlam_before | 0.583         |
| loss_ent        | 0.7491356     |
| loss_kl         | 0.00020117984 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.001941127  |
| loss_vf_loss    | 0.0014439032  |
-----------------------------------
********** Iteration 4285 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00054 |       0.00000 | 

********** Iteration 4290 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00027 |       0.00000 |       0.00032 |      4.63e-05 |       0.75099
     -0.00116 |       0.00000 |       0.00025 |      9.80e-05 |       0.75151
     -0.00133 |       0.00000 |       0.00021 |       0.00011 |       0.75171
     -0.00171 |       0.00000 |       0.00019 |       0.00013 |       0.75158
     -0.00217 |       0.00000 |       0.00017 |       0.00013 |       0.75169
     -0.00216 |       0.00000 |       0.00016 |       0.00015 |       0.75181
     -0.00241 |       0.00000 |       0.00015 |       0.00015 |       0.75171
     -0.00257 |       0.00000 |       0.00014 |       0.00016 |       0.75132
     -0.00270 |       0.00000 |       0.00014 |       0.00018 |       0.75152
     -0.00270 |       0.00000 |       0.00013 |       0.00019 |       0.75127
Evaluating losses...
     -0.00304 |       0.00000 |       0.00013 |       0.00018 |     

     -0.00286 |       0.00000 |       0.00120 |       0.00023 |       0.83367
     -0.00298 |       0.00000 |       0.00118 |       0.00024 |       0.83373
     -0.00306 |       0.00000 |       0.00116 |       0.00025 |       0.83383
Evaluating losses...
     -0.00328 |       0.00000 |       0.00114 |       0.00026 |       0.83381
-----------------------------------
| EpLenMean       | 3e+03         |
| EpRewMean       | 0.42          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6529          |
| TimeElapsed     | 2.67e+04      |
| TimestepsSoFar  | 17596416      |
| ev_tdlam_before | 0.56          |
| loss_ent        | 0.8338104     |
| loss_kl         | 0.0002607902  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0032764704 |
| loss_vf_loss    | 0.0011414603  |
-----------------------------------
********** Iteration 4296 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00049 |       0.00000 | 

********** Iteration 4301 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00027 |       0.00000 |       0.00014 |      4.43e-05 |       0.78070
     -0.00114 |       0.00000 |       0.00011 |       0.00012 |       0.78008
     -0.00175 |       0.00000 |       0.00011 |       0.00013 |       0.78012
     -0.00217 |       0.00000 |      9.89e-05 |       0.00015 |       0.78004
     -0.00240 |       0.00000 |      9.36e-05 |       0.00015 |       0.78032
     -0.00264 |       0.00000 |      8.94e-05 |       0.00017 |       0.78016
     -0.00288 |       0.00000 |      8.52e-05 |       0.00017 |       0.78025
     -0.00309 |       0.00000 |      8.19e-05 |       0.00018 |       0.78032
     -0.00323 |       0.00000 |      7.88e-05 |       0.00021 |       0.78010
     -0.00336 |       0.00000 |      7.64e-05 |       0.00021 |       0.78026
Evaluating losses...
     -0.00379 |       0.00000 |      7.48e-05 |       0.00023 |     

     -0.00216 |       0.00000 |      3.77e-05 |       0.00021 |       0.75318
     -0.00222 |       0.00000 |      3.61e-05 |       0.00021 |       0.75329
     -0.00230 |       0.00000 |      3.43e-05 |       0.00024 |       0.75348
Evaluating losses...
     -0.00251 |       0.00000 |      3.32e-05 |       0.00023 |       0.75345
-----------------------------------
| EpLenMean       | 3e+03         |
| EpRewMean       | 0.4           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6544          |
| TimeElapsed     | 2.67e+04      |
| TimestepsSoFar  | 17641472      |
| ev_tdlam_before | -1.16         |
| loss_ent        | 0.7534548     |
| loss_kl         | 0.00022678565 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0025134531 |
| loss_vf_loss    | 3.3202152e-05 |
-----------------------------------
********** Iteration 4307 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00090 |       0.00000 | 

********** Iteration 4312 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00032 |       0.00000 |       0.00246 |      4.77e-05 |       0.75122
     -0.00090 |       0.00000 |       0.00213 |       0.00013 |       0.75062
     -0.00108 |       0.00000 |       0.00191 |       0.00013 |       0.75064
     -0.00121 |       0.00000 |       0.00175 |       0.00013 |       0.74991
     -0.00139 |       0.00000 |       0.00162 |       0.00012 |       0.75067
     -0.00154 |       0.00000 |       0.00153 |       0.00013 |       0.75067
     -0.00160 |       0.00000 |       0.00144 |       0.00015 |       0.75120
     -0.00181 |       0.00000 |       0.00137 |       0.00017 |       0.75089
     -0.00175 |       0.00000 |       0.00130 |       0.00020 |       0.75153
     -0.00196 |       0.00000 |       0.00125 |       0.00016 |       0.75122
Evaluating losses...
     -0.00209 |       0.00000 |       0.00122 |       0.00015 |     

     -0.00332 |       0.00000 |      5.19e-05 |       0.00025 |       0.80584
     -0.00344 |       0.00000 |      4.85e-05 |       0.00026 |       0.80600
     -0.00355 |       0.00000 |      4.59e-05 |       0.00026 |       0.80577
Evaluating losses...
     -0.00391 |       0.00000 |      4.44e-05 |       0.00030 |       0.80611
-----------------------------------
| EpLenMean       | 3e+03         |
| EpRewMean       | 0.47          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6559          |
| TimeElapsed     | 2.67e+04      |
| TimestepsSoFar  | 17686528      |
| ev_tdlam_before | -0.388        |
| loss_ent        | 0.8061076     |
| loss_kl         | 0.00029819214 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0039073406 |
| loss_vf_loss    | 4.4365224e-05 |
-----------------------------------
********** Iteration 4318 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00026 |       0.00000 | 

********** Iteration 4323 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00019 |       0.00000 |       0.00256 |      2.44e-05 |       0.79555
     -0.00063 |       0.00000 |       0.00226 |      6.11e-05 |       0.79606
     -0.00089 |       0.00000 |       0.00213 |      6.02e-05 |       0.79593
     -0.00115 |       0.00000 |       0.00204 |      7.74e-05 |       0.79569
     -0.00136 |       0.00000 |       0.00198 |      9.60e-05 |       0.79614
     -0.00153 |       0.00000 |       0.00194 |       0.00010 |       0.79563
     -0.00157 |       0.00000 |       0.00192 |       0.00011 |       0.79602
     -0.00165 |       0.00000 |       0.00188 |       0.00012 |       0.79578
     -0.00177 |       0.00000 |       0.00186 |       0.00013 |       0.79592
     -0.00186 |       0.00000 |       0.00184 |       0.00014 |       0.79598
Evaluating losses...
     -0.00200 |       0.00000 |       0.00181 |       0.00015 |     

     -0.00234 |       0.00000 |       0.00098 |       0.00014 |       0.75236
     -0.00243 |       0.00000 |       0.00095 |       0.00015 |       0.75268
     -0.00251 |       0.00000 |       0.00092 |       0.00016 |       0.75238
Evaluating losses...
     -0.00269 |       0.00000 |       0.00090 |       0.00015 |       0.75276
-----------------------------------
| EpLenMean       | 3e+03         |
| EpRewMean       | 0.36          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6574          |
| TimeElapsed     | 2.68e+04      |
| TimestepsSoFar  | 17731584      |
| ev_tdlam_before | 0.144         |
| loss_ent        | 0.7527581     |
| loss_kl         | 0.0001518851  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.002687203  |
| loss_vf_loss    | 0.00089915213 |
-----------------------------------
********** Iteration 4329 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00056 |       0.00000 | 

********** Iteration 4334 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00038 |       0.00000 |      3.41e-05 |      3.94e-05 |       0.81024
     -0.00072 |       0.00000 |      2.71e-05 |      6.18e-05 |       0.81036
     -0.00113 |       0.00000 |      2.30e-05 |      7.90e-05 |       0.81029
     -0.00140 |       0.00000 |      2.06e-05 |      8.05e-05 |       0.81032
     -0.00158 |       0.00000 |      1.84e-05 |       0.00010 |       0.80984
     -0.00169 |       0.00000 |      1.68e-05 |       0.00012 |       0.81035
     -0.00189 |       0.00000 |      1.56e-05 |       0.00014 |       0.80995
     -0.00198 |       0.00000 |      1.46e-05 |       0.00014 |       0.80995
     -0.00211 |       0.00000 |      1.40e-05 |       0.00014 |       0.81014
     -0.00216 |       0.00000 |      1.34e-05 |       0.00017 |       0.81005
Evaluating losses...
     -0.00241 |       0.00000 |      1.29e-05 |       0.00016 |     

     -0.00239 |       0.00000 |       0.00093 |       0.00014 |       0.79566
     -0.00246 |       0.00000 |       0.00092 |       0.00016 |       0.79569
     -0.00252 |       0.00000 |       0.00090 |       0.00016 |       0.79586
Evaluating losses...
     -0.00268 |       0.00000 |       0.00087 |       0.00017 |       0.79584
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.32          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6588          |
| TimeElapsed     | 2.7e+04       |
| TimestepsSoFar  | 17776640      |
| ev_tdlam_before | 0.707         |
| loss_ent        | 0.79584384    |
| loss_kl         | 0.00016501932 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0026807208 |
| loss_vf_loss    | 0.0008748344  |
-----------------------------------
********** Iteration 4340 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00027 |       0.00000 | 

********** Iteration 4345 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00012 |       0.00000 |       0.00172 |      3.28e-05 |       0.78165
     -0.00115 |       0.00000 |       0.00142 |      8.12e-05 |       0.78091
     -0.00153 |       0.00000 |       0.00132 |      6.71e-05 |       0.78077
     -0.00171 |       0.00000 |       0.00128 |      8.32e-05 |       0.78099
     -0.00187 |       0.00000 |       0.00125 |      9.91e-05 |       0.78066
     -0.00196 |       0.00000 |       0.00123 |       0.00012 |       0.78035
     -0.00209 |       0.00000 |       0.00123 |       0.00012 |       0.78054
     -0.00217 |       0.00000 |       0.00122 |       0.00013 |       0.78044
     -0.00222 |       0.00000 |       0.00121 |       0.00015 |       0.78043
     -0.00228 |       0.00000 |       0.00120 |       0.00015 |       0.78054
Evaluating losses...
     -0.00239 |       0.00000 |       0.00119 |       0.00016 |     

     -0.00151 |       0.00000 |       0.00231 |       0.00021 |       0.77018
     -0.00162 |       0.00000 |       0.00230 |       0.00022 |       0.76989
     -0.00158 |       0.00000 |       0.00228 |       0.00022 |       0.76979
Evaluating losses...
     -0.00166 |       0.00000 |       0.00225 |       0.00020 |       0.77024
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.36         |
| EpThisIter      | 1            |
| EpisodesSoFar   | 6603         |
| TimeElapsed     | 2.7e+04      |
| TimestepsSoFar  | 17821696     |
| ev_tdlam_before | 0.288        |
| loss_ent        | 0.77024186   |
| loss_kl         | 0.0002022347 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.001656964 |
| loss_vf_loss    | 0.0022532095 |
----------------------------------
********** Iteration 4351 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00085 |       0.00000 |      2.87e-05 

********** Iteration 4356 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.88e-06 |       0.00000 |       0.00133 |      7.94e-05 |       0.78478
     -0.00102 |       0.00000 |       0.00121 |      1.00e-04 |       0.78468
     -0.00134 |       0.00000 |       0.00119 |       0.00010 |       0.78421
     -0.00139 |       0.00000 |       0.00117 |       0.00013 |       0.78407
     -0.00147 |       0.00000 |       0.00117 |       0.00013 |       0.78436
     -0.00162 |       0.00000 |       0.00116 |       0.00013 |       0.78421
     -0.00173 |       0.00000 |       0.00116 |       0.00013 |       0.78422
     -0.00164 |       0.00000 |       0.00115 |       0.00014 |       0.78464
     -0.00177 |       0.00000 |       0.00115 |       0.00015 |       0.78432
     -0.00180 |       0.00000 |       0.00115 |       0.00016 |       0.78431
Evaluating losses...
     -0.00193 |       0.00000 |       0.00114 |       0.00016 |     

     -0.00248 |       0.00000 |      2.31e-05 |       0.00018 |       0.76096
     -0.00269 |       0.00000 |      2.20e-05 |       0.00018 |       0.76097
     -0.00269 |       0.00000 |      2.10e-05 |       0.00021 |       0.76126
     -0.00288 |       0.00000 |      2.02e-05 |       0.00020 |       0.76109
Evaluating losses...
     -0.00306 |       0.00000 |      1.98e-05 |       0.00024 |       0.76038
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.23          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6618          |
| TimeElapsed     | 2.7e+04       |
| TimestepsSoFar  | 17866752      |
| ev_tdlam_before | -0.681        |
| loss_ent        | 0.76037794    |
| loss_kl         | 0.00023637636 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0030581441 |
| loss_vf_loss    | 1.9808298e-05 |
-----------------------------------
********** Iteration 4362 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 4367 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00025 |       0.00000 |       0.00378 |       0.00019 |       0.75357
     -0.00079 |       0.00000 |       0.00368 |       0.00017 |       0.75372
     -0.00112 |       0.00000 |       0.00362 |       0.00015 |       0.75297
     -0.00128 |       0.00000 |       0.00357 |       0.00015 |       0.75286
     -0.00141 |       0.00000 |       0.00352 |       0.00014 |       0.75268
     -0.00158 |       0.00000 |       0.00348 |       0.00013 |       0.75222
     -0.00159 |       0.00000 |       0.00343 |       0.00015 |       0.75225
     -0.00164 |       0.00000 |       0.00339 |       0.00016 |       0.75204
     -0.00163 |       0.00000 |       0.00336 |       0.00020 |       0.75241
     -0.00172 |       0.00000 |       0.00332 |       0.00016 |       0.75208
Evaluating losses...
     -0.00188 |       0.00000 |       0.00327 |       0.00017 |     

     -0.00262 |       0.00000 |      7.93e-05 |       0.00015 |       0.76463
     -0.00276 |       0.00000 |      7.67e-05 |       0.00017 |       0.76458
     -0.00300 |       0.00000 |      7.41e-05 |       0.00017 |       0.76437
     -0.00295 |       0.00000 |      7.23e-05 |       0.00017 |       0.76466
Evaluating losses...
     -0.00344 |       0.00000 |      7.08e-05 |       0.00017 |       0.76466
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.24          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6633          |
| TimeElapsed     | 2.7e+04       |
| TimestepsSoFar  | 17911808      |
| ev_tdlam_before | -0.0426       |
| loss_ent        | 0.7646621     |
| loss_kl         | 0.00017402216 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0034427443 |
| loss_vf_loss    | 7.0809234e-05 |
-----------------------------------
********** Iteration 4373 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 4378 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -5.29e-05 |       0.00000 |       0.00307 |      4.01e-05 |       0.77023
     -0.00084 |       0.00000 |       0.00262 |      9.83e-05 |       0.77020
     -0.00123 |       0.00000 |       0.00252 |      8.38e-05 |       0.76988
     -0.00132 |       0.00000 |       0.00245 |       0.00011 |       0.76976
     -0.00148 |       0.00000 |       0.00243 |       0.00011 |       0.76994
     -0.00163 |       0.00000 |       0.00239 |       0.00012 |       0.77007
     -0.00172 |       0.00000 |       0.00237 |       0.00013 |       0.77010
     -0.00178 |       0.00000 |       0.00235 |       0.00013 |       0.77012
     -0.00182 |       0.00000 |       0.00232 |       0.00014 |       0.77003
     -0.00187 |       0.00000 |       0.00229 |       0.00015 |       0.77012
Evaluating losses...
     -0.00199 |       0.00000 |       0.00227 |       0.00015 |     

     -0.00137 |       0.00000 |       0.00172 |       0.00011 |       0.76903
     -0.00147 |       0.00000 |       0.00170 |       0.00011 |       0.76907
     -0.00149 |       0.00000 |       0.00168 |       0.00013 |       0.76889
Evaluating losses...
     -0.00168 |       0.00000 |       0.00167 |       0.00013 |       0.76861
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.23          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6648          |
| TimeElapsed     | 2.71e+04      |
| TimestepsSoFar  | 17956864      |
| ev_tdlam_before | 0.314         |
| loss_ent        | 0.76860744    |
| loss_kl         | 0.000125352   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0016830768 |
| loss_vf_loss    | 0.0016676117  |
-----------------------------------
********** Iteration 4384 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     1.74e-05 |       0.00000 | 

********** Iteration 4389 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00043 |       0.00000 |       0.00753 |      4.09e-05 |       0.76453
     -0.00071 |       0.00000 |       0.00664 |      5.06e-05 |       0.76401
     -0.00118 |       0.00000 |       0.00606 |      5.96e-05 |       0.76377
     -0.00137 |       0.00000 |       0.00556 |      8.96e-05 |       0.76346
     -0.00158 |       0.00000 |       0.00513 |       0.00010 |       0.76359
     -0.00156 |       0.00000 |       0.00478 |       0.00010 |       0.76336
     -0.00177 |       0.00000 |       0.00449 |       0.00012 |       0.76361
     -0.00182 |       0.00000 |       0.00424 |       0.00013 |       0.76381
     -0.00187 |       0.00000 |       0.00406 |       0.00013 |       0.76338
     -0.00198 |       0.00000 |       0.00392 |       0.00015 |       0.76345
Evaluating losses...
     -0.00219 |       0.00000 |       0.00382 |       0.00014 |     

     -0.00161 |       0.00000 |       0.00015 |      9.12e-05 |       0.78706
     -0.00170 |       0.00000 |       0.00013 |       0.00010 |       0.78682
     -0.00178 |       0.00000 |       0.00012 |      9.97e-05 |       0.78693
     -0.00185 |       0.00000 |       0.00011 |       0.00011 |       0.78699
Evaluating losses...
     -0.00203 |       0.00000 |       0.00011 |       0.00011 |       0.78692
------------------------------------
| EpLenMean       | 3.02e+03       |
| EpRewMean       | 0.15           |
| EpThisIter      | 1              |
| EpisodesSoFar   | 6663           |
| TimeElapsed     | 2.73e+04       |
| TimestepsSoFar  | 18001920       |
| ev_tdlam_before | -2.27          |
| loss_ent        | 0.78691924     |
| loss_kl         | 0.00010611219  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.0020254545  |
| loss_vf_loss    | 0.000110287605 |
------------------------------------
********** Iteration 4395 ************
Optimizing...
     pol_surr |   

********** Iteration 4400 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00027 |       0.00000 |       0.00010 |      4.99e-05 |       0.76911
     -0.00108 |       0.00000 |      9.02e-05 |      8.26e-05 |       0.76849
     -0.00160 |       0.00000 |      8.41e-05 |      9.64e-05 |       0.76874
     -0.00180 |       0.00000 |      7.85e-05 |      9.80e-05 |       0.76858
     -0.00225 |       0.00000 |      7.41e-05 |       0.00011 |       0.76854
     -0.00235 |       0.00000 |      6.99e-05 |       0.00013 |       0.76872
     -0.00255 |       0.00000 |      6.69e-05 |       0.00013 |       0.76851
     -0.00272 |       0.00000 |      6.38e-05 |       0.00013 |       0.76860
     -0.00281 |       0.00000 |      6.10e-05 |       0.00013 |       0.76855
     -0.00277 |       0.00000 |      5.87e-05 |       0.00015 |       0.76857
Evaluating losses...
     -0.00311 |       0.00000 |      5.74e-05 |       0.00017 |     

     -0.00254 |       0.00000 |       0.00026 |       0.00013 |       0.77426
     -0.00266 |       0.00000 |       0.00024 |       0.00014 |       0.77443
     -0.00274 |       0.00000 |       0.00023 |       0.00015 |       0.77438
Evaluating losses...
     -0.00295 |       0.00000 |       0.00023 |       0.00015 |       0.77433
-----------------------------------
| EpLenMean       | 3.03e+03      |
| EpRewMean       | 0.2           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6678          |
| TimeElapsed     | 2.73e+04      |
| TimestepsSoFar  | 18046976      |
| ev_tdlam_before | -0.22         |
| loss_ent        | 0.7743296     |
| loss_kl         | 0.00015050652 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.002953495  |
| loss_vf_loss    | 0.00022509505 |
-----------------------------------
********** Iteration 4406 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00028 |       0.00000 | 

********** Iteration 4411 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00032 |       0.00000 |       0.00277 |       0.00014 |       0.76133
     -0.00061 |       0.00000 |       0.00233 |       0.00011 |       0.76114
     -0.00088 |       0.00000 |       0.00216 |      7.80e-05 |       0.76132
     -0.00108 |       0.00000 |       0.00207 |      9.38e-05 |       0.76134
     -0.00125 |       0.00000 |       0.00204 |       0.00011 |       0.76177
     -0.00124 |       0.00000 |       0.00200 |       0.00012 |       0.76153
     -0.00126 |       0.00000 |       0.00200 |       0.00012 |       0.76147
     -0.00148 |       0.00000 |       0.00197 |       0.00013 |       0.76121
     -0.00153 |       0.00000 |       0.00196 |       0.00013 |       0.76149
     -0.00153 |       0.00000 |       0.00194 |       0.00015 |       0.76108
Evaluating losses...
     -0.00170 |       0.00000 |       0.00192 |       0.00015 |     

     -0.00237 |       0.00000 |      8.01e-05 |       0.00017 |       0.75708
     -0.00242 |       0.00000 |      7.76e-05 |       0.00017 |       0.75703
     -0.00254 |       0.00000 |      7.48e-05 |       0.00018 |       0.75704
Evaluating losses...
     -0.00284 |       0.00000 |      7.34e-05 |       0.00017 |       0.75701
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.21          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6693          |
| TimeElapsed     | 2.73e+04      |
| TimestepsSoFar  | 18092032      |
| ev_tdlam_before | -0.15         |
| loss_ent        | 0.7570142     |
| loss_kl         | 0.00016974646 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0028390214 |
| loss_vf_loss    | 7.341804e-05  |
-----------------------------------
********** Iteration 4417 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00019 |       0.00000 | 

********** Iteration 4422 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -4.82e-06 |       0.00000 |      3.60e-05 |      3.45e-05 |       0.77637
     -0.00072 |       0.00000 |      3.32e-05 |      4.85e-05 |       0.77677
     -0.00104 |       0.00000 |      3.10e-05 |      5.69e-05 |       0.77670
     -0.00127 |       0.00000 |      2.95e-05 |      7.03e-05 |       0.77677
     -0.00144 |       0.00000 |      2.77e-05 |      7.87e-05 |       0.77680
     -0.00159 |       0.00000 |      2.64e-05 |      8.21e-05 |       0.77681
     -0.00168 |       0.00000 |      2.52e-05 |      9.20e-05 |       0.77676
     -0.00181 |       0.00000 |      2.42e-05 |       0.00010 |       0.77691
     -0.00187 |       0.00000 |      2.31e-05 |       0.00011 |       0.77672
     -0.00195 |       0.00000 |      2.25e-05 |       0.00011 |       0.77701
Evaluating losses...
     -0.00220 |       0.00000 |      2.15e-05 |       0.00012 |     

     -0.00196 |       0.00000 |       0.00015 |       0.00011 |       0.75742
     -0.00190 |       0.00000 |       0.00015 |       0.00012 |       0.75729
     -0.00203 |       0.00000 |       0.00014 |       0.00013 |       0.75736
Evaluating losses...
     -0.00218 |       0.00000 |       0.00014 |       0.00013 |       0.75748
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.22          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6708          |
| TimeElapsed     | 2.74e+04      |
| TimestepsSoFar  | 18137088      |
| ev_tdlam_before | -2.51         |
| loss_ent        | 0.75747967    |
| loss_kl         | 0.00012642483 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0021773975 |
| loss_vf_loss    | 0.00013737603 |
-----------------------------------
********** Iteration 4428 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00029 |       0.00000 | 

********** Iteration 4433 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00020 |       0.00000 |      8.11e-05 |      4.21e-05 |       0.78646
     -0.00099 |       0.00000 |      7.15e-05 |      5.79e-05 |       0.78634
     -0.00144 |       0.00000 |      6.55e-05 |      6.71e-05 |       0.78637
     -0.00183 |       0.00000 |      6.06e-05 |      9.19e-05 |       0.78625
     -0.00198 |       0.00000 |      5.63e-05 |      9.47e-05 |       0.78662
     -0.00213 |       0.00000 |      5.32e-05 |       0.00011 |       0.78637
     -0.00242 |       0.00000 |      5.06e-05 |       0.00011 |       0.78667
     -0.00246 |       0.00000 |      4.83e-05 |       0.00012 |       0.78675
     -0.00254 |       0.00000 |      4.64e-05 |       0.00013 |       0.78674
     -0.00267 |       0.00000 |      4.48e-05 |       0.00014 |       0.78656
Evaluating losses...
     -0.00295 |       0.00000 |      4.38e-05 |       0.00014 |     

     -0.00205 |       0.00000 |      4.70e-05 |       0.00010 |       0.78772
     -0.00219 |       0.00000 |      4.48e-05 |       0.00011 |       0.78748
     -0.00229 |       0.00000 |      4.28e-05 |       0.00012 |       0.78765
Evaluating losses...
     -0.00250 |       0.00000 |      4.17e-05 |       0.00011 |       0.78734
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.22          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6723          |
| TimeElapsed     | 2.74e+04      |
| TimestepsSoFar  | 18182144      |
| ev_tdlam_before | -1            |
| loss_ent        | 0.7873388     |
| loss_kl         | 0.00011237948 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0025001746 |
| loss_vf_loss    | 4.1675667e-05 |
-----------------------------------
********** Iteration 4439 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     8.30e-05 |       0.00000 | 

********** Iteration 4444 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     6.63e-05 |       0.00000 |       0.00014 |      2.95e-05 |       0.77029
     -0.00081 |       0.00000 |       0.00011 |      5.42e-05 |       0.77019
     -0.00113 |       0.00000 |      9.30e-05 |      6.61e-05 |       0.77016
     -0.00134 |       0.00000 |      8.37e-05 |      7.59e-05 |       0.77016
     -0.00143 |       0.00000 |      7.68e-05 |       0.00011 |       0.77038
     -0.00159 |       0.00000 |      7.12e-05 |       0.00010 |       0.77004
     -0.00170 |       0.00000 |      6.69e-05 |       0.00011 |       0.76989
     -0.00177 |       0.00000 |      6.31e-05 |       0.00011 |       0.76997
     -0.00186 |       0.00000 |      5.97e-05 |       0.00011 |       0.76990
     -0.00191 |       0.00000 |      5.71e-05 |       0.00012 |       0.76999
Evaluating losses...
     -0.00209 |       0.00000 |      5.50e-05 |       0.00012 |     

     -0.00131 |       0.00000 |       0.00101 |      8.69e-05 |       0.76785
     -0.00138 |       0.00000 |       0.00100 |       0.00010 |       0.76797
     -0.00141 |       0.00000 |       0.00098 |       0.00011 |       0.76826
     -0.00153 |       0.00000 |       0.00096 |       0.00013 |       0.76806
Evaluating losses...
     -0.00164 |       0.00000 |       0.00095 |       0.00013 |       0.76830
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.21          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6738          |
| TimeElapsed     | 2.74e+04      |
| TimestepsSoFar  | 18227200      |
| ev_tdlam_before | 0.747         |
| loss_ent        | 0.7683039     |
| loss_kl         | 0.00012862746 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0016422116 |
| loss_vf_loss    | 0.00094837847 |
-----------------------------------
********** Iteration 4450 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 4455 ************
Eval num_timesteps=18247680, episode_reward=0.41 +/- 0.71
Episode length: 3000.00 +/- 0.00
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00021 |       0.00000 |       0.00108 |      1.84e-05 |       0.78981
     -0.00061 |       0.00000 |       0.00057 |      4.19e-05 |       0.79004
     -0.00087 |       0.00000 |       0.00047 |      4.67e-05 |       0.78980
     -0.00112 |       0.00000 |       0.00045 |      4.85e-05 |       0.78983
     -0.00113 |       0.00000 |       0.00044 |      5.54e-05 |       0.78975
     -0.00119 |       0.00000 |       0.00043 |      6.32e-05 |       0.78988
     -0.00138 |       0.00000 |       0.00042 |      6.35e-05 |       0.78975
     -0.00141 |       0.00000 |       0.00041 |      6.38e-05 |       0.78952
     -0.00146 |       0.00000 |       0.00041 |      6.97e-05 |       0.78994
     -0.00152 |       0.00000 |       0.00041 |      7.15e-05 |       0.7896

     -0.00232 |       0.00000 |      2.30e-05 |       0.00013 |       0.79791
     -0.00250 |       0.00000 |      2.25e-05 |       0.00014 |       0.79831
     -0.00265 |       0.00000 |      2.21e-05 |       0.00015 |       0.79791
     -0.00261 |       0.00000 |      2.17e-05 |       0.00016 |       0.79779
Evaluating losses...
     -0.00295 |       0.00000 |      2.15e-05 |       0.00015 |       0.79770
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.21          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6753          |
| TimeElapsed     | 2.76e+04      |
| TimestepsSoFar  | 18272256      |
| ev_tdlam_before | -1.03         |
| loss_ent        | 0.7976963     |
| loss_kl         | 0.00014598723 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0029547499 |
| loss_vf_loss    | 2.1525346e-05 |
-----------------------------------
********** Iteration 4461 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 4466 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     7.96e-06 |       0.00000 |       0.00036 |      2.66e-05 |       0.77670
     -0.00066 |       0.00000 |       0.00022 |      5.21e-05 |       0.77687
     -0.00089 |       0.00000 |       0.00020 |      7.75e-05 |       0.77691
     -0.00117 |       0.00000 |       0.00018 |      8.32e-05 |       0.77651
     -0.00125 |       0.00000 |       0.00017 |       0.00011 |       0.77655
     -0.00139 |       0.00000 |       0.00016 |       0.00011 |       0.77661
     -0.00148 |       0.00000 |       0.00015 |       0.00013 |       0.77659
     -0.00156 |       0.00000 |       0.00015 |       0.00014 |       0.77674
     -0.00162 |       0.00000 |       0.00015 |       0.00013 |       0.77658
     -0.00162 |       0.00000 |       0.00014 |       0.00015 |       0.77651
Evaluating losses...
     -0.00175 |       0.00000 |       0.00014 |       0.00016 |     

     -0.00107 |       0.00000 |       0.00320 |       0.00011 |       0.76378
     -0.00106 |       0.00000 |       0.00317 |       0.00012 |       0.76365
     -0.00110 |       0.00000 |       0.00314 |       0.00013 |       0.76363
Evaluating losses...
     -0.00121 |       0.00000 |       0.00311 |       0.00011 |       0.76371
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.24          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6768          |
| TimeElapsed     | 2.77e+04      |
| TimestepsSoFar  | 18317312      |
| ev_tdlam_before | 0.569         |
| loss_ent        | 0.76370895    |
| loss_kl         | 0.00011258879 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0012087    |
| loss_vf_loss    | 0.0031134519  |
-----------------------------------
********** Iteration 4472 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00041 |       0.00000 | 

********** Iteration 4477 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00028 |       0.00000 |      8.82e-05 |      4.87e-05 |       0.78149
     -0.00073 |       0.00000 |      6.93e-05 |      6.48e-05 |       0.78188
     -0.00119 |       0.00000 |      6.02e-05 |      8.31e-05 |       0.78194
     -0.00147 |       0.00000 |      5.47e-05 |      9.67e-05 |       0.78187
     -0.00162 |       0.00000 |      5.09e-05 |       0.00012 |       0.78170
     -0.00194 |       0.00000 |      4.85e-05 |       0.00011 |       0.78186
     -0.00201 |       0.00000 |      4.65e-05 |       0.00011 |       0.78204
     -0.00208 |       0.00000 |      4.49e-05 |       0.00012 |       0.78180
     -0.00224 |       0.00000 |      4.35e-05 |       0.00013 |       0.78187
     -0.00231 |       0.00000 |      4.22e-05 |       0.00013 |       0.78207
Evaluating losses...
     -0.00262 |       0.00000 |      4.19e-05 |       0.00012 |     

     -0.00200 |       0.00000 |       0.00012 |       0.00010 |       0.76679
     -0.00210 |       0.00000 |       0.00012 |       0.00011 |       0.76651
     -0.00221 |       0.00000 |       0.00011 |       0.00011 |       0.76656
Evaluating losses...
     -0.00240 |       0.00000 |       0.00011 |       0.00013 |       0.76614
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.28          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6783          |
| TimeElapsed     | 2.77e+04      |
| TimestepsSoFar  | 18362368      |
| ev_tdlam_before | -0.421        |
| loss_ent        | 0.76614285    |
| loss_kl         | 0.00012608859 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0024023056 |
| loss_vf_loss    | 0.00011161204 |
-----------------------------------
********** Iteration 4483 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00015 |       0.00000 | 

********** Iteration 4488 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00038 |       0.00000 |       0.00153 |      2.93e-05 |       0.75273
      0.00025 |       0.00000 |       0.00138 |       0.00040 |       0.75306
     -0.00059 |       0.00000 |       0.00132 |      5.22e-05 |       0.75270
     -0.00082 |       0.00000 |       0.00129 |      6.01e-05 |       0.75244
     -0.00099 |       0.00000 |       0.00126 |      5.72e-05 |       0.75255
     -0.00094 |       0.00000 |       0.00125 |       0.00013 |       0.75246
     -0.00109 |       0.00000 |       0.00124 |      8.30e-05 |       0.75251
     -0.00127 |       0.00000 |       0.00123 |      7.21e-05 |       0.75282
     -0.00119 |       0.00000 |       0.00122 |       0.00011 |       0.75266
     -0.00124 |       0.00000 |       0.00121 |       0.00011 |       0.75271
Evaluating losses...
     -0.00152 |       0.00000 |       0.00121 |      8.81e-05 |     

     -0.00112 |       0.00000 |       0.00052 |      8.04e-05 |       0.79548
     -0.00116 |       0.00000 |       0.00050 |      7.58e-05 |       0.79545
     -0.00122 |       0.00000 |       0.00049 |      8.54e-05 |       0.79524
Evaluating losses...
     -0.00135 |       0.00000 |       0.00048 |      8.05e-05 |       0.79541
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.33          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6798          |
| TimeElapsed     | 2.77e+04      |
| TimestepsSoFar  | 18407424      |
| ev_tdlam_before | -0.49         |
| loss_ent        | 0.7954132     |
| loss_kl         | 8.051282e-05  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0013501514 |
| loss_vf_loss    | 0.0004789254  |
-----------------------------------
********** Iteration 4494 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -3.12e-05 |       0.00000 | 

********** Iteration 4499 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00039 |       0.00000 |       0.00058 |      3.29e-05 |       0.80362
     -0.00067 |       0.00000 |       0.00046 |      3.73e-05 |       0.80385
     -0.00083 |       0.00000 |       0.00039 |      5.02e-05 |       0.80374
     -0.00109 |       0.00000 |       0.00035 |      5.99e-05 |       0.80413
     -0.00115 |       0.00000 |       0.00031 |      7.12e-05 |       0.80430
     -0.00130 |       0.00000 |       0.00028 |      7.33e-05 |       0.80398
     -0.00147 |       0.00000 |       0.00026 |      7.91e-05 |       0.80387
     -0.00142 |       0.00000 |       0.00025 |      8.03e-05 |       0.80396
     -0.00163 |       0.00000 |       0.00023 |      8.60e-05 |       0.80403
     -0.00161 |       0.00000 |       0.00022 |      9.52e-05 |       0.80397
Evaluating losses...
     -0.00189 |       0.00000 |       0.00021 |       0.00011 |     

     -0.00227 |       0.00000 |      5.49e-05 |      8.64e-05 |       0.83453
     -0.00234 |       0.00000 |      5.31e-05 |      9.03e-05 |       0.83460
     -0.00242 |       0.00000 |      5.18e-05 |      9.33e-05 |       0.83443
Evaluating losses...
     -0.00276 |       0.00000 |      5.06e-05 |      9.71e-05 |       0.83437
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.34          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6813          |
| TimeElapsed     | 2.78e+04      |
| TimestepsSoFar  | 18452480      |
| ev_tdlam_before | -1.14         |
| loss_ent        | 0.83437216    |
| loss_kl         | 9.7066615e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0027639414 |
| loss_vf_loss    | 5.0641018e-05 |
-----------------------------------
********** Iteration 4505 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00041 |       0.00000 | 

********** Iteration 4510 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00137 |       0.00000 |       0.00358 |       0.00021 |       0.77992
      0.00056 |       0.00000 |       0.00347 |       0.00023 |       0.77927
     -0.00092 |       0.00000 |       0.00341 |       0.00011 |       0.77929
     -0.00100 |       0.00000 |       0.00337 |       0.00011 |       0.77928
     -0.00112 |       0.00000 |       0.00333 |       0.00010 |       0.77935
     -0.00120 |       0.00000 |       0.00329 |       0.00012 |       0.77956
     -0.00093 |       0.00000 |       0.00327 |       0.00018 |       0.77946
     -0.00135 |       0.00000 |       0.00324 |       0.00015 |       0.77954
     -0.00150 |       0.00000 |       0.00323 |       0.00013 |       0.77941
     -0.00151 |       0.00000 |       0.00320 |       0.00013 |       0.77948
Evaluating losses...
     -0.00163 |       0.00000 |       0.00318 |       0.00013 |     

     -0.00172 |       0.00000 |      9.32e-05 |      7.87e-05 |       0.78648
     -0.00183 |       0.00000 |      9.00e-05 |      8.03e-05 |       0.78664
     -0.00178 |       0.00000 |      8.77e-05 |      8.94e-05 |       0.78688
Evaluating losses...
     -0.00197 |       0.00000 |      8.55e-05 |      8.68e-05 |       0.78649
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.36          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6828          |
| TimeElapsed     | 2.78e+04      |
| TimestepsSoFar  | 18497536      |
| ev_tdlam_before | -0.448        |
| loss_ent        | 0.78648955    |
| loss_kl         | 8.679372e-05  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.001966798  |
| loss_vf_loss    | 8.5475294e-05 |
-----------------------------------
********** Iteration 4516 ************
Eval num_timesteps=18497536, episode_reward=0.33 +/- 0.76
Episode length: 3000.00 +/- 0.00
Optimizing...
     pol_surr |    

********** Iteration 4521 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     4.91e-05 |       0.00000 |       0.00192 |      1.71e-05 |       0.77496
     -0.00040 |       0.00000 |       0.00189 |      5.44e-05 |       0.77503
     -0.00065 |       0.00000 |       0.00187 |      5.46e-05 |       0.77552
     -0.00078 |       0.00000 |       0.00185 |      6.38e-05 |       0.77570
     -0.00086 |       0.00000 |       0.00184 |      5.83e-05 |       0.77573
     -0.00097 |       0.00000 |       0.00181 |      5.38e-05 |       0.77568
     -0.00101 |       0.00000 |       0.00180 |      5.19e-05 |       0.77539
     -0.00105 |       0.00000 |       0.00178 |      5.49e-05 |       0.77542
     -0.00106 |       0.00000 |       0.00177 |      8.29e-05 |       0.77526
     -0.00110 |       0.00000 |       0.00176 |      7.13e-05 |       0.77545
Evaluating losses...
     -0.00117 |       0.00000 |       0.00176 |      7.27e-05 |     

     -0.00161 |       0.00000 |       0.00155 |      5.40e-05 |       0.76151
     -0.00163 |       0.00000 |       0.00150 |      5.76e-05 |       0.76167
     -0.00172 |       0.00000 |       0.00146 |      6.24e-05 |       0.76181
Evaluating losses...
     -0.00185 |       0.00000 |       0.00143 |      6.29e-05 |       0.76175
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.42          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6843          |
| TimeElapsed     | 2.8e+04       |
| TimestepsSoFar  | 18542592      |
| ev_tdlam_before | 0.677         |
| loss_ent        | 0.7617523     |
| loss_kl         | 6.2944215e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0018458238 |
| loss_vf_loss    | 0.0014313796  |
-----------------------------------
********** Iteration 4527 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     5.34e-05 |       0.00000 | 

********** Iteration 4532 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00109 |       0.00000 |       0.00152 |       0.00014 |       0.76870
     -0.00026 |       0.00000 |       0.00148 |      9.06e-05 |       0.76834
     -0.00033 |       0.00000 |       0.00145 |      6.43e-05 |       0.76787
     -0.00034 |       0.00000 |       0.00143 |       0.00012 |       0.76836
     -0.00078 |       0.00000 |       0.00141 |      4.92e-05 |       0.76772
     -0.00096 |       0.00000 |       0.00140 |      4.73e-05 |       0.76791
     -0.00107 |       0.00000 |       0.00139 |      4.96e-05 |       0.76799
     -0.00088 |       0.00000 |       0.00137 |      6.48e-05 |       0.76793
     -0.00090 |       0.00000 |       0.00137 |      7.20e-05 |       0.76790
     -0.00111 |       0.00000 |       0.00136 |      6.45e-05 |       0.76762
Evaluating losses...
     -0.00125 |       0.00000 |       0.00135 |      7.53e-05 |     

     -0.00081 |       0.00000 |       0.00063 |      5.76e-05 |       0.76359
     -0.00086 |       0.00000 |       0.00062 |      5.56e-05 |       0.76348
     -0.00087 |       0.00000 |       0.00061 |      6.21e-05 |       0.76364
Evaluating losses...
     -0.00097 |       0.00000 |       0.00060 |      5.93e-05 |       0.76364
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.4           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6858          |
| TimeElapsed     | 2.8e+04       |
| TimestepsSoFar  | 18587648      |
| ev_tdlam_before | 0.671         |
| loss_ent        | 0.76363575    |
| loss_kl         | 5.9299753e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0009700705 |
| loss_vf_loss    | 0.000604691   |
-----------------------------------
********** Iteration 4538 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00037 |       0.00000 | 

********** Iteration 4543 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -4.60e-07 |       0.00000 |       0.00023 |      1.72e-05 |       0.79449
     -0.00076 |       0.00000 |       0.00018 |      4.68e-05 |       0.79444
     -0.00111 |       0.00000 |       0.00015 |      4.58e-05 |       0.79466
     -0.00127 |       0.00000 |       0.00013 |      6.18e-05 |       0.79460
     -0.00150 |       0.00000 |       0.00012 |      6.01e-05 |       0.79454
     -0.00157 |       0.00000 |       0.00011 |      6.30e-05 |       0.79448
     -0.00170 |       0.00000 |       0.00011 |      6.96e-05 |       0.79458
     -0.00178 |       0.00000 |       0.00010 |      7.62e-05 |       0.79462
     -0.00184 |       0.00000 |      9.89e-05 |      8.16e-05 |       0.79456
     -0.00191 |       0.00000 |      9.61e-05 |      8.11e-05 |       0.79451
Evaluating losses...
     -0.00207 |       0.00000 |      9.43e-05 |      8.09e-05 |     

     -0.00070 |       0.00000 |       0.00093 |      4.77e-05 |       0.74193
     -0.00091 |       0.00000 |       0.00092 |      4.85e-05 |       0.74189
     -0.00088 |       0.00000 |       0.00091 |      5.63e-05 |       0.74203
Evaluating losses...
     -0.00102 |       0.00000 |       0.00090 |      6.24e-05 |       0.74217
----------------------------------
| EpLenMean       | 3.01e+03     |
| EpRewMean       | 0.42         |
| EpThisIter      | 2            |
| EpisodesSoFar   | 6873         |
| TimeElapsed     | 2.8e+04      |
| TimestepsSoFar  | 18632704     |
| ev_tdlam_before | 0.822        |
| loss_ent        | 0.742174     |
| loss_kl         | 6.240382e-05 |
| loss_pol_entpen | 0.0          |
| loss_pol_surr   | -0.001024303 |
| loss_vf_loss    | 0.0009022609 |
----------------------------------
********** Iteration 4549 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     9.14e-05 |       0.00000 |       0.00123 

********** Iteration 4554 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00015 |       0.00000 |       0.00204 |       0.00014 |       0.79496
     -0.00051 |       0.00000 |       0.00185 |      8.72e-05 |       0.79559
     -0.00079 |       0.00000 |       0.00175 |      6.96e-05 |       0.79554
     -0.00092 |       0.00000 |       0.00169 |      6.49e-05 |       0.79547
     -0.00107 |       0.00000 |       0.00165 |      6.07e-05 |       0.79550
     -0.00115 |       0.00000 |       0.00161 |      4.67e-05 |       0.79576
     -0.00111 |       0.00000 |       0.00158 |      6.34e-05 |       0.79554
     -0.00122 |       0.00000 |       0.00156 |      5.95e-05 |       0.79563
     -0.00119 |       0.00000 |       0.00154 |      6.18e-05 |       0.79565
     -0.00132 |       0.00000 |       0.00152 |      6.24e-05 |       0.79559
Evaluating losses...
     -0.00140 |       0.00000 |       0.00150 |      6.37e-05 |     

     -0.00132 |       0.00000 |       0.00011 |      4.82e-05 |       0.78638
     -0.00139 |       0.00000 |       0.00011 |      5.18e-05 |       0.78645
     -0.00144 |       0.00000 |       0.00010 |      5.68e-05 |       0.78642
Evaluating losses...
     -0.00162 |       0.00000 |       0.00010 |      5.69e-05 |       0.78651
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.39          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6888          |
| TimeElapsed     | 2.81e+04      |
| TimestepsSoFar  | 18677760      |
| ev_tdlam_before | -0.0458       |
| loss_ent        | 0.7865086     |
| loss_kl         | 5.6875127e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.001622665  |
| loss_vf_loss    | 0.00010056922 |
-----------------------------------
********** Iteration 4560 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -2.87e-05 |       0.00000 | 

********** Iteration 4565 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00032 |       0.00000 |       0.00011 |      2.30e-05 |       0.76136
     -0.00039 |       0.00000 |      9.90e-05 |      2.79e-05 |       0.76165
     -0.00063 |       0.00000 |      9.09e-05 |      3.70e-05 |       0.76174
     -0.00088 |       0.00000 |      8.44e-05 |      4.05e-05 |       0.76191
     -0.00091 |       0.00000 |      7.89e-05 |      4.07e-05 |       0.76169
     -0.00110 |       0.00000 |      7.36e-05 |      4.37e-05 |       0.76166
     -0.00111 |       0.00000 |      6.93e-05 |      4.53e-05 |       0.76171
     -0.00123 |       0.00000 |      6.53e-05 |      6.24e-05 |       0.76197
     -0.00128 |       0.00000 |      6.17e-05 |      5.40e-05 |       0.76166
     -0.00133 |       0.00000 |      5.83e-05 |      5.97e-05 |       0.76184
Evaluating losses...
     -0.00139 |       0.00000 |      5.65e-05 |      7.68e-05 |     

     -0.00124 |       0.00000 |       0.00013 |      3.98e-05 |       0.76642
     -0.00125 |       0.00000 |       0.00013 |      4.24e-05 |       0.76644
     -0.00126 |       0.00000 |       0.00012 |      4.63e-05 |       0.76628
Evaluating losses...
     -0.00149 |       0.00000 |       0.00012 |      4.79e-05 |       0.76629
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.3            |
| EpThisIter      | 2              |
| EpisodesSoFar   | 6903           |
| TimeElapsed     | 2.81e+04       |
| TimestepsSoFar  | 18722816       |
| ev_tdlam_before | -0.775         |
| loss_ent        | 0.7662869      |
| loss_kl         | 4.7937672e-05  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.001489165   |
| loss_vf_loss    | 0.000116955634 |
------------------------------------
********** Iteration 4571 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -8.34e-05 |   

********** Iteration 4576 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00022 |       0.00000 |       0.00348 |      3.62e-05 |       0.75689
     -0.00028 |       0.00000 |       0.00336 |      5.00e-05 |       0.75657
     -0.00048 |       0.00000 |       0.00325 |      4.39e-05 |       0.75673
     -0.00044 |       0.00000 |       0.00312 |      4.67e-05 |       0.75677
     -0.00058 |       0.00000 |       0.00299 |      7.01e-05 |       0.75633
     -0.00071 |       0.00000 |       0.00286 |      5.85e-05 |       0.75647
     -0.00072 |       0.00000 |       0.00274 |      6.13e-05 |       0.75619
     -0.00081 |       0.00000 |       0.00263 |      6.49e-05 |       0.75634
     -0.00086 |       0.00000 |       0.00253 |      6.69e-05 |       0.75627
     -0.00082 |       0.00000 |       0.00244 |      7.13e-05 |       0.75608
Evaluating losses...
     -0.00089 |       0.00000 |       0.00239 |      7.95e-05 |     

     -0.00059 |       0.00000 |       0.00106 |      3.83e-05 |       0.82080
     -0.00057 |       0.00000 |       0.00103 |      4.26e-05 |       0.82077
     -0.00075 |       0.00000 |       0.00101 |      4.57e-05 |       0.82067
     -0.00076 |       0.00000 |       0.00099 |      4.13e-05 |       0.82089
     -0.00080 |       0.00000 |       0.00099 |      4.53e-05 |       0.82083
Evaluating losses...
     -0.00087 |       0.00000 |       0.00098 |      4.93e-05 |       0.82099
-----------------------------------
| EpLenMean       | 3.02e+03      |
| EpRewMean       | 0.29          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6917          |
| TimeElapsed     | 2.83e+04      |
| TimestepsSoFar  | 18767872      |
| ev_tdlam_before | 0.58          |
| loss_ent        | 0.82099456    |
| loss_kl         | 4.9340637e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0008723717 |
| loss_vf_loss    | 0.0009757474  |
-----------------------------------
*******

********** Iteration 4587 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00031 |       0.00000 |       0.00237 |      1.97e-05 |       0.77037
     -0.00045 |       0.00000 |       0.00196 |      2.61e-05 |       0.77050
     -0.00069 |       0.00000 |       0.00173 |      2.71e-05 |       0.77049
     -0.00074 |       0.00000 |       0.00157 |      3.48e-05 |       0.77051
     -0.00078 |       0.00000 |       0.00143 |      4.52e-05 |       0.77052
     -0.00093 |       0.00000 |       0.00131 |      4.33e-05 |       0.77043
     -0.00106 |       0.00000 |       0.00121 |      4.48e-05 |       0.77038
     -0.00106 |       0.00000 |       0.00113 |      5.13e-05 |       0.77045
     -0.00111 |       0.00000 |       0.00106 |      5.24e-05 |       0.77035
     -0.00110 |       0.00000 |       0.00101 |      5.53e-05 |       0.77057
Evaluating losses...
     -0.00120 |       0.00000 |       0.00097 |      5.35e-05 |     

     -0.00124 |       0.00000 |       0.00433 |      5.24e-05 |       0.80343
     -0.00132 |       0.00000 |       0.00421 |      4.96e-05 |       0.80338
     -0.00137 |       0.00000 |       0.00411 |      5.96e-05 |       0.80349
Evaluating losses...
     -0.00150 |       0.00000 |       0.00404 |      5.37e-05 |       0.80316
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.3           |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6932          |
| TimeElapsed     | 2.84e+04      |
| TimestepsSoFar  | 18812928      |
| ev_tdlam_before | 0.511         |
| loss_ent        | 0.8031633     |
| loss_kl         | 5.3741613e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0014997228 |
| loss_vf_loss    | 0.0040445337  |
-----------------------------------
********** Iteration 4593 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -3.18e-05 |       0.00000 | 

********** Iteration 4598 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00026 |       0.00000 |       0.00421 |      1.48e-05 |       0.76425
     -0.00038 |       0.00000 |       0.00336 |      1.47e-05 |       0.76442
     -0.00069 |       0.00000 |       0.00290 |      1.94e-05 |       0.76411
     -0.00084 |       0.00000 |       0.00262 |      2.28e-05 |       0.76415
     -0.00085 |       0.00000 |       0.00244 |      2.73e-05 |       0.76420
     -0.00095 |       0.00000 |       0.00232 |      2.84e-05 |       0.76410
     -0.00099 |       0.00000 |       0.00223 |      3.30e-05 |       0.76412
     -0.00106 |       0.00000 |       0.00215 |      3.33e-05 |       0.76417
     -0.00111 |       0.00000 |       0.00208 |      3.56e-05 |       0.76418
     -0.00118 |       0.00000 |       0.00203 |      3.78e-05 |       0.76420
Evaluating losses...
     -0.00125 |       0.00000 |       0.00199 |      3.74e-05 |     

     -0.00096 |       0.00000 |       0.00157 |      3.67e-05 |       0.78574
     -0.00095 |       0.00000 |       0.00156 |      4.04e-05 |       0.78583
     -0.00101 |       0.00000 |       0.00154 |      4.04e-05 |       0.78579
     -0.00107 |       0.00000 |       0.00152 |      4.31e-05 |       0.78584
Evaluating losses...
     -0.00111 |       0.00000 |       0.00152 |      4.28e-05 |       0.78567
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.34          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 6947          |
| TimeElapsed     | 2.84e+04      |
| TimestepsSoFar  | 18857984      |
| ev_tdlam_before | 0.676         |
| loss_ent        | 0.7856724     |
| loss_kl         | 4.2795105e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0011057474 |
| loss_vf_loss    | 0.0015241324  |
-----------------------------------
********** Iteration 4604 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 4609 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00032 |       0.00000 |       0.00346 |      1.59e-05 |       0.76298
     -0.00067 |       0.00000 |       0.00327 |      2.95e-05 |       0.76262
     -0.00078 |       0.00000 |       0.00315 |      3.00e-05 |       0.76272
     -0.00093 |       0.00000 |       0.00307 |      3.13e-05 |       0.76272
     -0.00088 |       0.00000 |       0.00302 |      3.49e-05 |       0.76268
     -0.00102 |       0.00000 |       0.00299 |      3.34e-05 |       0.76278
     -0.00106 |       0.00000 |       0.00296 |      3.67e-05 |       0.76264
     -0.00110 |       0.00000 |       0.00293 |      3.94e-05 |       0.76269
     -0.00111 |       0.00000 |       0.00290 |      4.28e-05 |       0.76268
     -0.00117 |       0.00000 |       0.00289 |      4.51e-05 |       0.76277
Evaluating losses...
     -0.00122 |       0.00000 |       0.00286 |      4.95e-05 |     

     -0.00120 |       0.00000 |       0.00163 |      5.43e-05 |       0.79654
     -0.00117 |       0.00000 |       0.00161 |      5.64e-05 |       0.79658
     -0.00127 |       0.00000 |       0.00160 |      5.40e-05 |       0.79660
Evaluating losses...
     -0.00138 |       0.00000 |       0.00159 |      5.86e-05 |       0.79665
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.45          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6963          |
| TimeElapsed     | 2.85e+04      |
| TimestepsSoFar  | 18903040      |
| ev_tdlam_before | 0.677         |
| loss_ent        | 0.796652      |
| loss_kl         | 5.864948e-05  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0013828637 |
| loss_vf_loss    | 0.0015856453  |
-----------------------------------
********** Iteration 4615 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00033 |       0.00000 | 

********** Iteration 4620 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00025 |       0.00000 |       0.00189 |      6.96e-05 |       0.76467
     -0.00019 |       0.00000 |       0.00174 |      4.39e-05 |       0.76444
     -0.00024 |       0.00000 |       0.00163 |      2.78e-05 |       0.76472
     -0.00051 |       0.00000 |       0.00155 |      2.43e-05 |       0.76459
     -0.00060 |       0.00000 |       0.00148 |      2.39e-05 |       0.76464
     -0.00058 |       0.00000 |       0.00141 |      2.56e-05 |       0.76471
     -0.00064 |       0.00000 |       0.00135 |      2.80e-05 |       0.76457
     -0.00054 |       0.00000 |       0.00129 |      3.95e-05 |       0.76487
     -0.00049 |       0.00000 |       0.00125 |      5.72e-05 |       0.76438
     -0.00077 |       0.00000 |       0.00120 |      3.37e-05 |       0.76462
Evaluating losses...
     -0.00080 |       0.00000 |       0.00116 |      3.36e-05 |     

     -0.00084 |       0.00000 |       0.00266 |      2.91e-05 |       0.75852
     -0.00086 |       0.00000 |       0.00264 |      3.23e-05 |       0.75840
     -0.00089 |       0.00000 |       0.00261 |      3.49e-05 |       0.75852
Evaluating losses...
     -0.00091 |       0.00000 |       0.00260 |      3.84e-05 |       0.75832
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.48          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6978          |
| TimeElapsed     | 2.85e+04      |
| TimestepsSoFar  | 18948096      |
| ev_tdlam_before | 0.52          |
| loss_ent        | 0.75831676    |
| loss_kl         | 3.839287e-05  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0009062486 |
| loss_vf_loss    | 0.0026049046  |
-----------------------------------
********** Iteration 4626 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     6.19e-05 |       0.00000 | 

********** Iteration 4631 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00026 |       0.00000 |       0.00065 |      1.07e-05 |       0.75514
     -0.00039 |       0.00000 |       0.00052 |      2.06e-05 |       0.75499
     -0.00071 |       0.00000 |       0.00044 |      2.16e-05 |       0.75520
     -0.00085 |       0.00000 |       0.00039 |      2.74e-05 |       0.75518
     -0.00099 |       0.00000 |       0.00036 |      3.06e-05 |       0.75537
     -0.00106 |       0.00000 |       0.00033 |      3.24e-05 |       0.75521
     -0.00106 |       0.00000 |       0.00031 |      3.89e-05 |       0.75530
     -0.00119 |       0.00000 |       0.00029 |      4.06e-05 |       0.75534
     -0.00125 |       0.00000 |       0.00028 |      4.16e-05 |       0.75523
     -0.00129 |       0.00000 |       0.00027 |      4.56e-05 |       0.75537
Evaluating losses...
     -0.00142 |       0.00000 |       0.00027 |      4.59e-05 |     

     -0.00122 |       0.00000 |      1.70e-05 |      3.87e-05 |       0.73726
     -0.00130 |       0.00000 |      1.65e-05 |      4.05e-05 |       0.73721
     -0.00133 |       0.00000 |      1.62e-05 |      4.28e-05 |       0.73730
Evaluating losses...
     -0.00150 |       0.00000 |      1.58e-05 |      4.46e-05 |       0.73698
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.46          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 6993          |
| TimeElapsed     | 2.86e+04      |
| TimestepsSoFar  | 18993152      |
| ev_tdlam_before | -1.38         |
| loss_ent        | 0.736982      |
| loss_kl         | 4.4605276e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0015013712 |
| loss_vf_loss    | 1.5825726e-05 |
-----------------------------------
********** Iteration 4637 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     7.75e-05 |       0.00000 | 

********** Iteration 4642 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00016 |       0.00000 |       0.00235 |      6.31e-06 |       0.72252
     -0.00018 |       0.00000 |       0.00225 |      1.33e-05 |       0.72258
     -0.00031 |       0.00000 |       0.00220 |      1.79e-05 |       0.72270
     -0.00038 |       0.00000 |       0.00215 |      1.75e-05 |       0.72259
     -0.00046 |       0.00000 |       0.00212 |      2.02e-05 |       0.72266
     -0.00048 |       0.00000 |       0.00209 |      2.37e-05 |       0.72284
     -0.00052 |       0.00000 |       0.00207 |      2.81e-05 |       0.72267
     -0.00055 |       0.00000 |       0.00204 |      2.72e-05 |       0.72282
     -0.00060 |       0.00000 |       0.00202 |      2.75e-05 |       0.72275
     -0.00061 |       0.00000 |       0.00200 |      3.11e-05 |       0.72276
Evaluating losses...
     -0.00064 |       0.00000 |       0.00199 |      3.13e-05 |     

     -0.00060 |       0.00000 |       0.00193 |      2.95e-05 |       0.76130
     -0.00065 |       0.00000 |       0.00190 |      2.71e-05 |       0.76132
     -0.00068 |       0.00000 |       0.00188 |      2.69e-05 |       0.76129
Evaluating losses...
     -0.00070 |       0.00000 |       0.00185 |      2.81e-05 |       0.76115
------------------------------------
| EpLenMean       | 3.02e+03       |
| EpRewMean       | 0.47           |
| EpThisIter      | 1              |
| EpisodesSoFar   | 7007           |
| TimeElapsed     | 2.88e+04       |
| TimestepsSoFar  | 19038208       |
| ev_tdlam_before | 0.035          |
| loss_ent        | 0.7611459      |
| loss_kl         | 2.8119663e-05  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00070365216 |
| loss_vf_loss    | 0.0018539764   |
------------------------------------
********** Iteration 4648 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00011 |   

********** Iteration 4653 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     7.14e-05 |       0.00000 |       0.00320 |      3.69e-05 |       0.74227
     -0.00046 |       0.00000 |       0.00309 |      3.24e-05 |       0.74199
     -0.00059 |       0.00000 |       0.00299 |      3.36e-05 |       0.74213
     -0.00066 |       0.00000 |       0.00292 |      2.67e-05 |       0.74218
     -0.00072 |       0.00000 |       0.00285 |      2.76e-05 |       0.74235
     -0.00048 |       0.00000 |       0.00279 |       0.00014 |       0.74306
     -0.00077 |       0.00000 |       0.00272 |      7.77e-05 |       0.74295
     -0.00082 |       0.00000 |       0.00267 |      5.38e-05 |       0.74269
     -0.00085 |       0.00000 |       0.00263 |      4.94e-05 |       0.74266
     -0.00087 |       0.00000 |       0.00259 |      4.48e-05 |       0.74250
Evaluating losses...
     -0.00091 |       0.00000 |       0.00257 |      4.54e-05 |     

     -0.00083 |       0.00000 |      4.76e-05 |      2.88e-05 |       0.76736
     -0.00092 |       0.00000 |      4.66e-05 |      3.08e-05 |       0.76753
     -0.00092 |       0.00000 |      4.54e-05 |      2.87e-05 |       0.76752
Evaluating losses...
     -0.00105 |       0.00000 |      4.49e-05 |      2.82e-05 |       0.76747
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.46          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 7022          |
| TimeElapsed     | 2.88e+04      |
| TimestepsSoFar  | 19083264      |
| ev_tdlam_before | -0.939        |
| loss_ent        | 0.7674665     |
| loss_kl         | 2.818194e-05  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0010528365 |
| loss_vf_loss    | 4.4856457e-05 |
-----------------------------------
********** Iteration 4659 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00026 |       0.00000 | 

********** Iteration 4664 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00021 |       0.00000 |       0.00165 |      9.59e-06 |       0.79126
     -0.00026 |       0.00000 |       0.00129 |      2.61e-05 |       0.79146
     -0.00023 |       0.00000 |       0.00107 |      4.28e-05 |       0.79119
     -0.00046 |       0.00000 |       0.00096 |      3.78e-05 |       0.79140
     -0.00060 |       0.00000 |       0.00087 |      2.91e-05 |       0.79128
     -0.00046 |       0.00000 |       0.00082 |      3.74e-05 |       0.79120
     -0.00061 |       0.00000 |       0.00078 |      5.52e-05 |       0.79125
     -0.00058 |       0.00000 |       0.00075 |      4.19e-05 |       0.79092
     -0.00074 |       0.00000 |       0.00073 |      3.94e-05 |       0.79115
     -0.00061 |       0.00000 |       0.00071 |      4.05e-05 |       0.79097
Evaluating losses...
     -0.00082 |       0.00000 |       0.00070 |      4.95e-05 |     

     -0.00052 |       0.00000 |       0.00061 |      1.52e-05 |       0.74754
     -0.00052 |       0.00000 |       0.00056 |      1.55e-05 |       0.74755
     -0.00058 |       0.00000 |       0.00052 |      1.69e-05 |       0.74753
     -0.00054 |       0.00000 |       0.00048 |      1.77e-05 |       0.74764
Evaluating losses...
     -0.00064 |       0.00000 |       0.00047 |      1.74e-05 |       0.74754
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.38          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 7037          |
| TimeElapsed     | 2.89e+04      |
| TimestepsSoFar  | 19128320      |
| ev_tdlam_before | 0.673         |
| loss_ent        | 0.74754304    |
| loss_kl         | 1.7371838e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0006395248 |
| loss_vf_loss    | 0.0004670742  |
-----------------------------------
********** Iteration 4670 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 4675 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.94e-05 |       0.00000 |       0.00745 |      7.56e-06 |       0.81375
     -0.00043 |       0.00000 |       0.00699 |      1.14e-05 |       0.81378
     -0.00054 |       0.00000 |       0.00670 |      1.46e-05 |       0.81406
     -0.00065 |       0.00000 |       0.00648 |      1.75e-05 |       0.81402
     -0.00071 |       0.00000 |       0.00631 |      2.07e-05 |       0.81404
     -0.00077 |       0.00000 |       0.00617 |      2.07e-05 |       0.81417
     -0.00080 |       0.00000 |       0.00605 |      2.39e-05 |       0.81410
     -0.00085 |       0.00000 |       0.00596 |      2.64e-05 |       0.81414
     -0.00090 |       0.00000 |       0.00589 |      3.23e-05 |       0.81433
     -0.00093 |       0.00000 |       0.00581 |      3.13e-05 |       0.81419
Evaluating losses...
     -0.00101 |       0.00000 |       0.00576 |      2.99e-05 |     

     -0.00103 |       0.00000 |       0.00019 |      2.81e-05 |       0.83453
     -0.00106 |       0.00000 |       0.00018 |      2.75e-05 |       0.83442
     -0.00114 |       0.00000 |       0.00017 |      3.02e-05 |       0.83457
Evaluating losses...
     -0.00123 |       0.00000 |       0.00017 |      2.98e-05 |       0.83449
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.36          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 7052          |
| TimeElapsed     | 2.9e+04       |
| TimestepsSoFar  | 19173376      |
| ev_tdlam_before | -0.24         |
| loss_ent        | 0.8344913     |
| loss_kl         | 2.9779012e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.001232632  |
| loss_vf_loss    | 0.0001703596  |
-----------------------------------
********** Iteration 4681 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.16e-05 |       0.00000 | 

********** Iteration 4686 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -2.19e-05 |       0.00000 |       0.00343 |      1.24e-05 |       0.77077
     -0.00040 |       0.00000 |       0.00331 |      2.61e-05 |       0.77031
     -0.00048 |       0.00000 |       0.00324 |      3.93e-05 |       0.77072
     -0.00059 |       0.00000 |       0.00318 |      2.65e-05 |       0.77048
     -0.00066 |       0.00000 |       0.00314 |      2.43e-05 |       0.77028
     -0.00070 |       0.00000 |       0.00310 |      2.53e-05 |       0.77011
     -0.00070 |       0.00000 |       0.00307 |      2.54e-05 |       0.77019
     -0.00071 |       0.00000 |       0.00303 |      2.96e-05 |       0.77036
     -0.00076 |       0.00000 |       0.00300 |      2.81e-05 |       0.77018
     -0.00077 |       0.00000 |       0.00297 |      2.68e-05 |       0.77027
Evaluating losses...
     -0.00080 |       0.00000 |       0.00295 |      2.76e-05 |     

     -0.00042 |       0.00000 |       0.00125 |      1.42e-05 |       0.76476
     -0.00040 |       0.00000 |       0.00124 |      2.77e-05 |       0.76474
     -0.00046 |       0.00000 |       0.00123 |      1.98e-05 |       0.76466
     -0.00049 |       0.00000 |       0.00122 |      2.26e-05 |       0.76466
Evaluating losses...
     -0.00039 |       0.00000 |       0.00122 |      8.04e-05 |       0.76421
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.23          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 7067          |
| TimeElapsed     | 2.9e+04       |
| TimestepsSoFar  | 19218432      |
| ev_tdlam_before | 0.62          |
| loss_ent        | 0.764213      |
| loss_kl         | 8.038476e-05  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0003933002 |
| loss_vf_loss    | 0.0012158639  |
-----------------------------------
********** Iteration 4692 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 4697 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.53e-05 |       0.00000 |       0.00192 |      4.45e-06 |       0.77568
     -0.00022 |       0.00000 |       0.00180 |      8.60e-06 |       0.77557
     -0.00030 |       0.00000 |       0.00171 |      7.53e-06 |       0.77563
     -0.00039 |       0.00000 |       0.00163 |      1.02e-05 |       0.77550
     -0.00039 |       0.00000 |       0.00156 |      1.04e-05 |       0.77550
     -0.00049 |       0.00000 |       0.00150 |      1.11e-05 |       0.77555
     -0.00046 |       0.00000 |       0.00146 |      1.34e-05 |       0.77557
     -0.00053 |       0.00000 |       0.00141 |      1.43e-05 |       0.77563
     -0.00051 |       0.00000 |       0.00138 |      1.55e-05 |       0.77555
     -0.00057 |       0.00000 |       0.00134 |      1.62e-05 |       0.77556
Evaluating losses...
     -0.00058 |       0.00000 |       0.00132 |      1.67e-05 |     

     -0.00075 |       0.00000 |      3.11e-05 |      1.66e-05 |       0.74787
     -0.00078 |       0.00000 |      3.04e-05 |      1.79e-05 |       0.74799
     -0.00079 |       0.00000 |      2.98e-05 |      1.94e-05 |       0.74797
     -0.00085 |       0.00000 |      2.92e-05 |      2.03e-05 |       0.74790
     -0.00090 |       0.00000 |      2.88e-05 |      2.04e-05 |       0.74792
Evaluating losses...
     -0.00097 |       0.00000 |      2.87e-05 |      2.03e-05 |       0.74781
------------------------------------
| EpLenMean       | 3.02e+03       |
| EpRewMean       | 0.23           |
| EpThisIter      | 1              |
| EpisodesSoFar   | 7082           |
| TimeElapsed     | 2.95e+04       |
| TimestepsSoFar  | 19263488       |
| ev_tdlam_before | -1.45          |
| loss_ent        | 0.74780595     |
| loss_kl         | 2.0304373e-05  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00096935406 |
| loss_vf_loss    | 2.865269e-05   |
------------------------------

********** Iteration 4708 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
      0.00019 |       0.00000 |       0.00030 |      4.89e-06 |       0.77704
     -0.00011 |       0.00000 |       0.00028 |      7.68e-06 |       0.77694
     -0.00014 |       0.00000 |       0.00027 |      7.46e-06 |       0.77692
     -0.00021 |       0.00000 |       0.00026 |      8.67e-06 |       0.77688
     -0.00023 |       0.00000 |       0.00025 |      9.91e-06 |       0.77692
     -0.00037 |       0.00000 |       0.00025 |      1.14e-05 |       0.77704
     -0.00030 |       0.00000 |       0.00024 |      1.11e-05 |       0.77683
     -0.00041 |       0.00000 |       0.00023 |      1.34e-05 |       0.77701
     -0.00030 |       0.00000 |       0.00023 |      1.37e-05 |       0.77686
     -0.00044 |       0.00000 |       0.00022 |      1.50e-05 |       0.77691
Evaluating losses...
     -0.00049 |       0.00000 |       0.00022 |      1.67e-05 |     

     -0.00043 |       0.00000 |       0.00137 |      1.43e-05 |       0.76753
     -0.00043 |       0.00000 |       0.00135 |      1.43e-05 |       0.76759
     -0.00046 |       0.00000 |       0.00133 |      1.34e-05 |       0.76765
     -0.00041 |       0.00000 |       0.00132 |      1.54e-05 |       0.76768
Evaluating losses...
     -0.00051 |       0.00000 |       0.00131 |      1.62e-05 |       0.76782
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.24           |
| EpThisIter      | 1              |
| EpisodesSoFar   | 7097           |
| TimeElapsed     | 2.96e+04       |
| TimestepsSoFar  | 19308544       |
| ev_tdlam_before | 0.662          |
| loss_ent        | 0.76781577     |
| loss_kl         | 1.6150225e-05  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00051019876 |
| loss_vf_loss    | 0.0013137724   |
------------------------------------
********** Iteration 4714 ************
Optimizing...
     pol_surr |   

********** Iteration 4719 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.74e-05 |       0.00000 |       0.00152 |      5.35e-06 |       0.77054
     -0.00022 |       0.00000 |       0.00145 |      7.30e-06 |       0.77062
     -0.00030 |       0.00000 |       0.00141 |      1.02e-05 |       0.77047
     -0.00039 |       0.00000 |       0.00138 |      1.10e-05 |       0.77049
     -0.00037 |       0.00000 |       0.00135 |      1.22e-05 |       0.77053
     -0.00041 |       0.00000 |       0.00133 |      1.38e-05 |       0.77053
     -0.00044 |       0.00000 |       0.00131 |      1.48e-05 |       0.77039
     -0.00047 |       0.00000 |       0.00130 |      1.59e-05 |       0.77038
     -0.00046 |       0.00000 |       0.00129 |      1.83e-05 |       0.77050
     -0.00048 |       0.00000 |       0.00127 |      1.65e-05 |       0.77058
Evaluating losses...
     -0.00055 |       0.00000 |       0.00126 |      1.58e-05 |     

     -0.00062 |       0.00000 |       0.00253 |      1.48e-05 |       0.79551
     -0.00064 |       0.00000 |       0.00250 |      1.50e-05 |       0.79558
     -0.00067 |       0.00000 |       0.00248 |      1.51e-05 |       0.79557
Evaluating losses...
     -0.00070 |       0.00000 |       0.00246 |      1.54e-05 |       0.79559
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.28          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 7112          |
| TimeElapsed     | 2.96e+04      |
| TimestepsSoFar  | 19353600      |
| ev_tdlam_before | 0.52          |
| loss_ent        | 0.7955879     |
| loss_kl         | 1.5435598e-05 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0007013139 |
| loss_vf_loss    | 0.0024564532  |
-----------------------------------
********** Iteration 4725 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     9.19e-05 |       0.00000 | 

********** Iteration 4730 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     5.76e-06 |       0.00000 |       0.00011 |      4.61e-06 |       0.76952
     -0.00032 |       0.00000 |       0.00010 |      9.70e-06 |       0.76951
     -0.00045 |       0.00000 |      9.47e-05 |      8.88e-06 |       0.76957
     -0.00054 |       0.00000 |      9.06e-05 |      1.16e-05 |       0.76951
     -0.00059 |       0.00000 |      8.71e-05 |      1.32e-05 |       0.76952
     -0.00066 |       0.00000 |      8.41e-05 |      1.31e-05 |       0.76953
     -0.00071 |       0.00000 |      8.12e-05 |      1.48e-05 |       0.76946
     -0.00074 |       0.00000 |      7.89e-05 |      1.49e-05 |       0.76952
     -0.00077 |       0.00000 |      7.67e-05 |      1.68e-05 |       0.76946
     -0.00081 |       0.00000 |      7.47e-05 |      1.65e-05 |       0.76947
Evaluating losses...
     -0.00088 |       0.00000 |      7.36e-05 |      1.63e-05 |     

     -0.00058 |       0.00000 |       0.00204 |      8.88e-06 |       0.76695
     -0.00062 |       0.00000 |       0.00199 |      9.19e-06 |       0.76694
     -0.00063 |       0.00000 |       0.00194 |      9.45e-06 |       0.76690
Evaluating losses...
     -0.00065 |       0.00000 |       0.00191 |      9.79e-06 |       0.76698
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.32          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 7127          |
| TimeElapsed     | 2.97e+04      |
| TimestepsSoFar  | 19398656      |
| ev_tdlam_before | 0.585         |
| loss_ent        | 0.76697624    |
| loss_kl         | 9.78682e-06   |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0006545973 |
| loss_vf_loss    | 0.0019071269  |
-----------------------------------
********** Iteration 4736 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     8.67e-05 |       0.00000 | 

********** Iteration 4741 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     4.81e-05 |       0.00000 |       0.00085 |      3.02e-06 |       0.75701
     -0.00014 |       0.00000 |       0.00070 |      2.99e-06 |       0.75697
     -0.00014 |       0.00000 |       0.00061 |      5.09e-06 |       0.75705
     -0.00020 |       0.00000 |       0.00055 |      5.81e-06 |       0.75713
     -0.00022 |       0.00000 |       0.00051 |      7.13e-06 |       0.75707
     -0.00028 |       0.00000 |       0.00048 |      7.66e-06 |       0.75713
     -0.00025 |       0.00000 |       0.00045 |      9.35e-06 |       0.75708
     -0.00032 |       0.00000 |       0.00043 |      8.78e-06 |       0.75719
     -0.00032 |       0.00000 |       0.00041 |      9.73e-06 |       0.75720
     -0.00035 |       0.00000 |       0.00040 |      9.94e-06 |       0.75720
Evaluating losses...
     -0.00035 |       0.00000 |       0.00039 |      1.05e-05 |     

     -0.00025 |       0.00000 |       0.00118 |      7.92e-06 |       0.77085
     -0.00031 |       0.00000 |       0.00117 |      1.05e-05 |       0.77068
     -0.00035 |       0.00000 |       0.00116 |      1.07e-05 |       0.77065
     -0.00035 |       0.00000 |       0.00115 |      9.33e-06 |       0.77073
Evaluating losses...
     -0.00041 |       0.00000 |       0.00114 |      9.32e-06 |       0.77066
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.26           |
| EpThisIter      | 1              |
| EpisodesSoFar   | 7142           |
| TimeElapsed     | 2.98e+04       |
| TimestepsSoFar  | 19443712       |
| ev_tdlam_before | 0.675          |
| loss_ent        | 0.7706636      |
| loss_kl         | 9.324463e-06   |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00040508027 |
| loss_vf_loss    | 0.0011416773   |
------------------------------------
********** Iteration 4747 ************
Optimizing...
     pol_surr |   

********** Iteration 4752 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     6.10e-05 |       0.00000 |       0.00298 |      3.87e-06 |       0.78845
     -0.00028 |       0.00000 |       0.00289 |      5.49e-06 |       0.78843
     -0.00039 |       0.00000 |       0.00284 |      7.28e-06 |       0.78851
     -0.00044 |       0.00000 |       0.00281 |      7.58e-06 |       0.78825
     -0.00046 |       0.00000 |       0.00278 |      7.38e-06 |       0.78838
     -0.00049 |       0.00000 |       0.00276 |      7.75e-06 |       0.78842
     -0.00056 |       0.00000 |       0.00274 |      8.91e-06 |       0.78833
     -0.00051 |       0.00000 |       0.00272 |      8.87e-06 |       0.78837
     -0.00058 |       0.00000 |       0.00271 |      9.38e-06 |       0.78839
     -0.00055 |       0.00000 |       0.00269 |      1.12e-05 |       0.78846
Evaluating losses...
     -0.00058 |       0.00000 |       0.00268 |      1.18e-05 |     

     -0.00034 |       0.00000 |       0.00126 |      8.43e-06 |       0.79969
     -0.00038 |       0.00000 |       0.00121 |      8.74e-06 |       0.79965
     -0.00039 |       0.00000 |       0.00117 |      9.06e-06 |       0.79964
     -0.00044 |       0.00000 |       0.00113 |      9.83e-06 |       0.79962
Evaluating losses...
     -0.00044 |       0.00000 |       0.00110 |      9.99e-06 |       0.79961
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.36          |
| EpThisIter      | 1             |
| EpisodesSoFar   | 7157          |
| TimeElapsed     | 3e+04         |
| TimestepsSoFar  | 19488768      |
| ev_tdlam_before | 0.58          |
| loss_ent        | 0.79960966    |
| loss_kl         | 9.993273e-06  |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0004405859 |
| loss_vf_loss    | 0.0011035716  |
-----------------------------------
********** Iteration 4758 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 4763 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -4.76e-05 |       0.00000 |       0.00438 |      2.39e-05 |       0.76535
     -0.00023 |       0.00000 |       0.00425 |      3.91e-05 |       0.76540
     -0.00030 |       0.00000 |       0.00417 |      1.66e-05 |       0.76534
     -0.00035 |       0.00000 |       0.00410 |      1.12e-05 |       0.76522
     -0.00040 |       0.00000 |       0.00405 |      9.20e-06 |       0.76532
     -0.00043 |       0.00000 |       0.00401 |      1.04e-05 |       0.76512
     -0.00046 |       0.00000 |       0.00397 |      1.79e-05 |       0.76531
     -0.00049 |       0.00000 |       0.00394 |      1.21e-05 |       0.76523
     -0.00050 |       0.00000 |       0.00392 |      1.25e-05 |       0.76515
     -0.00052 |       0.00000 |       0.00390 |      1.47e-05 |       0.76524
Evaluating losses...
     -0.00056 |       0.00000 |       0.00388 |      1.46e-05 |     

     -0.00043 |       0.00000 |      9.40e-05 |      5.84e-06 |       0.74682
     -0.00049 |       0.00000 |      9.13e-05 |      6.31e-06 |       0.74683
     -0.00054 |       0.00000 |      8.86e-05 |      6.92e-06 |       0.74686
     -0.00055 |       0.00000 |      8.60e-05 |      7.47e-06 |       0.74681
Evaluating losses...
     -0.00060 |       0.00000 |      8.48e-05 |      7.59e-06 |       0.74692
------------------------------------
| EpLenMean       | 3.02e+03       |
| EpRewMean       | 0.37           |
| EpThisIter      | 2              |
| EpisodesSoFar   | 7172           |
| TimeElapsed     | 3.03e+04       |
| TimestepsSoFar  | 19533824       |
| ev_tdlam_before | -1.23          |
| loss_ent        | 0.7469178      |
| loss_kl         | 7.5856806e-06  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00060334906 |
| loss_vf_loss    | 8.478512e-05   |
------------------------------------
********** Iteration 4769 ************
Optimizing...
     pol_surr |   

********** Iteration 4774 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     3.68e-05 |       0.00000 |       0.00024 |      9.94e-07 |       0.78661
     -0.00017 |       0.00000 |       0.00021 |      2.97e-06 |       0.78654
     -0.00025 |       0.00000 |       0.00019 |      4.09e-06 |       0.78656
     -0.00032 |       0.00000 |       0.00018 |      4.26e-06 |       0.78658
     -0.00037 |       0.00000 |       0.00017 |      4.43e-06 |       0.78648
     -0.00040 |       0.00000 |       0.00016 |      5.00e-06 |       0.78654
     -0.00042 |       0.00000 |       0.00016 |      6.72e-06 |       0.78650
     -0.00047 |       0.00000 |       0.00015 |      6.16e-06 |       0.78647
     -0.00048 |       0.00000 |       0.00014 |      6.51e-06 |       0.78653
     -0.00051 |       0.00000 |       0.00014 |      7.07e-06 |       0.78651
Evaluating losses...
     -0.00056 |       0.00000 |       0.00014 |      7.08e-06 |     

     -0.00042 |       0.00000 |       0.00018 |      4.62e-06 |       0.79617
     -0.00045 |       0.00000 |       0.00018 |      5.14e-06 |       0.79618
     -0.00047 |       0.00000 |       0.00017 |      5.79e-06 |       0.79617
     -0.00049 |       0.00000 |       0.00017 |      5.93e-06 |       0.79614
Evaluating losses...
     -0.00052 |       0.00000 |       0.00016 |      6.57e-06 |       0.79623
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.36           |
| EpThisIter      | 2              |
| EpisodesSoFar   | 7187           |
| TimeElapsed     | 3.03e+04       |
| TimestepsSoFar  | 19578880       |
| ev_tdlam_before | -2.63          |
| loss_ent        | 0.7962275      |
| loss_kl         | 6.571119e-06   |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00052132877 |
| loss_vf_loss    | 0.00016424486  |
------------------------------------
********** Iteration 4780 ************
Optimizing...
     pol_surr |   

********** Iteration 4785 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     1.69e-05 |       0.00000 |       0.00279 |      1.35e-06 |       0.78708
     -0.00013 |       0.00000 |       0.00264 |      2.44e-06 |       0.78704
     -0.00019 |       0.00000 |       0.00254 |      3.21e-06 |       0.78695
     -0.00023 |       0.00000 |       0.00247 |      3.81e-06 |       0.78695
     -0.00027 |       0.00000 |       0.00240 |      3.74e-06 |       0.78691
     -0.00029 |       0.00000 |       0.00234 |      3.77e-06 |       0.78693
     -0.00032 |       0.00000 |       0.00229 |      4.15e-06 |       0.78695
     -0.00033 |       0.00000 |       0.00224 |      4.00e-06 |       0.78695
     -0.00035 |       0.00000 |       0.00220 |      4.58e-06 |       0.78698
     -0.00035 |       0.00000 |       0.00216 |      5.02e-06 |       0.78698
Evaluating losses...
     -0.00039 |       0.00000 |       0.00214 |      4.87e-06 |     

     -0.00032 |       0.00000 |       0.00228 |      4.68e-06 |       0.78560
     -0.00033 |       0.00000 |       0.00226 |      4.85e-06 |       0.78564
     -0.00032 |       0.00000 |       0.00224 |      5.90e-06 |       0.78568
     -0.00032 |       0.00000 |       0.00222 |      7.75e-06 |       0.78570
Evaluating losses...
     -0.00035 |       0.00000 |       0.00221 |      6.97e-06 |       0.78571
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.4           |
| EpThisIter      | 2             |
| EpisodesSoFar   | 7202          |
| TimeElapsed     | 3.04e+04      |
| TimestepsSoFar  | 19623936      |
| ev_tdlam_before | 0.152         |
| loss_ent        | 0.78571236    |
| loss_kl         | 6.9697517e-06 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0003504221 |
| loss_vf_loss    | 0.002210358   |
-----------------------------------
********** Iteration 4791 ************
Optimizing...
     pol_surr |    pol_entpen | 

********** Iteration 4796 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     5.80e-05 |       0.00000 |      5.18e-05 |      1.55e-06 |       0.79373
     -0.00013 |       0.00000 |      4.77e-05 |      3.63e-06 |       0.79371
     -0.00020 |       0.00000 |      4.62e-05 |      4.13e-06 |       0.79380
     -0.00024 |       0.00000 |      4.52e-05 |      3.84e-06 |       0.79382
     -0.00029 |       0.00000 |      4.45e-05 |      3.93e-06 |       0.79387
     -0.00033 |       0.00000 |      4.38e-05 |      4.47e-06 |       0.79381
     -0.00034 |       0.00000 |      4.32e-05 |      5.04e-06 |       0.79387
     -0.00038 |       0.00000 |      4.27e-05 |      4.80e-06 |       0.79379
     -0.00040 |       0.00000 |      4.22e-05 |      5.02e-06 |       0.79385
     -0.00042 |       0.00000 |      4.17e-05 |      5.16e-06 |       0.79387
Evaluating losses...
     -0.00047 |       0.00000 |      4.14e-05 |      5.14e-06 |     

     -0.00029 |       0.00000 |       0.00307 |      3.96e-06 |       0.76289
     -0.00029 |       0.00000 |       0.00301 |      4.45e-06 |       0.76283
     -0.00031 |       0.00000 |       0.00296 |      4.38e-06 |       0.76290
     -0.00032 |       0.00000 |       0.00291 |      4.61e-06 |       0.76290
Evaluating losses...
     -0.00033 |       0.00000 |       0.00288 |      4.72e-06 |       0.76295
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.37           |
| EpThisIter      | 2              |
| EpisodesSoFar   | 7217           |
| TimeElapsed     | 3.04e+04       |
| TimestepsSoFar  | 19668992       |
| ev_tdlam_before | 0.468          |
| loss_ent        | 0.76295453     |
| loss_kl         | 4.7230264e-06  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00033330888 |
| loss_vf_loss    | 0.0028823763   |
------------------------------------
********** Iteration 4802 ************
Optimizing...
     pol_surr |   

********** Iteration 4807 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     8.94e-05 |       0.00000 |       0.00160 |      7.22e-07 |       0.77958
     -0.00010 |       0.00000 |       0.00150 |      2.17e-06 |       0.77962
     -0.00017 |       0.00000 |       0.00144 |      2.73e-06 |       0.77961
     -0.00022 |       0.00000 |       0.00138 |      2.45e-06 |       0.77958
     -0.00024 |       0.00000 |       0.00132 |      2.49e-06 |       0.77953
     -0.00021 |       0.00000 |       0.00128 |      2.68e-06 |       0.77955
     -0.00026 |       0.00000 |       0.00123 |      3.10e-06 |       0.77948
     -0.00029 |       0.00000 |       0.00119 |      3.26e-06 |       0.77953
     -0.00030 |       0.00000 |       0.00116 |      3.53e-06 |       0.77950
     -0.00029 |       0.00000 |       0.00113 |      3.68e-06 |       0.77949
Evaluating losses...
     -0.00032 |       0.00000 |       0.00111 |      3.98e-06 |     

     -0.00035 |       0.00000 |      9.29e-05 |      2.96e-06 |       0.79387
     -0.00038 |       0.00000 |      9.20e-05 |      3.19e-06 |       0.79388
     -0.00039 |       0.00000 |      9.12e-05 |      3.13e-06 |       0.79394
     -0.00040 |       0.00000 |      9.04e-05 |      3.63e-06 |       0.79393
Evaluating losses...
     -0.00043 |       0.00000 |      8.98e-05 |      3.40e-06 |       0.79399
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.38           |
| EpThisIter      | 2              |
| EpisodesSoFar   | 7232           |
| TimeElapsed     | 3.05e+04       |
| TimestepsSoFar  | 19714048       |
| ev_tdlam_before | -0.169         |
| loss_ent        | 0.79399335     |
| loss_kl         | 3.4024474e-06  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00043444894 |
| loss_vf_loss    | 8.981688e-05   |
------------------------------------
********** Iteration 4813 ************
Optimizing...
     pol_surr |   

********** Iteration 4818 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.04e-05 |       0.00000 |       0.00214 |      5.15e-07 |       0.78020
     -0.00011 |       0.00000 |       0.00212 |      1.94e-06 |       0.78029
     -0.00014 |       0.00000 |       0.00209 |      1.84e-06 |       0.78028
     -0.00016 |       0.00000 |       0.00207 |      2.18e-06 |       0.78033
     -0.00017 |       0.00000 |       0.00206 |      1.81e-06 |       0.78030
     -0.00019 |       0.00000 |       0.00204 |      2.12e-06 |       0.78031
     -0.00020 |       0.00000 |       0.00202 |      1.94e-06 |       0.78029
     -0.00020 |       0.00000 |       0.00201 |      2.11e-06 |       0.78032
     -0.00021 |       0.00000 |       0.00199 |      2.36e-06 |       0.78025
     -0.00022 |       0.00000 |       0.00198 |      2.33e-06 |       0.78028
Evaluating losses...
     -0.00023 |       0.00000 |       0.00197 |      2.22e-06 |     

     -0.00017 |       0.00000 |       0.00177 |      1.79e-06 |       0.75262
     -0.00017 |       0.00000 |       0.00175 |      1.82e-06 |       0.75265
     -0.00018 |       0.00000 |       0.00174 |      1.83e-06 |       0.75269
     -0.00019 |       0.00000 |       0.00172 |      1.94e-06 |       0.75264
     -0.00020 |       0.00000 |       0.00170 |      2.10e-06 |       0.75267
Evaluating losses...
     -0.00021 |       0.00000 |       0.00170 |      2.23e-06 |       0.75272
------------------------------------
| EpLenMean       | 3.02e+03       |
| EpRewMean       | 0.35           |
| EpThisIter      | 2              |
| EpisodesSoFar   | 7247           |
| TimeElapsed     | 3.08e+04       |
| TimestepsSoFar  | 19759104       |
| ev_tdlam_before | 0.446          |
| loss_ent        | 0.75272167     |
| loss_kl         | 2.2257088e-06  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00021122431 |
| loss_vf_loss    | 0.001695659    |
------------------------------

********** Iteration 4829 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     3.07e-05 |       0.00000 |      5.26e-05 |      6.20e-07 |       0.73115
    -8.64e-05 |       0.00000 |      5.01e-05 |      7.78e-07 |       0.73110
     -0.00013 |       0.00000 |      4.85e-05 |      9.75e-07 |       0.73113
     -0.00015 |       0.00000 |      4.74e-05 |      1.02e-06 |       0.73110
     -0.00018 |       0.00000 |      4.65e-05 |      1.21e-06 |       0.73111
     -0.00018 |       0.00000 |      4.58e-05 |      1.41e-06 |       0.73115
     -0.00020 |       0.00000 |      4.51e-05 |      1.48e-06 |       0.73113
     -0.00020 |       0.00000 |      4.45e-05 |      1.59e-06 |       0.73116
     -0.00022 |       0.00000 |      4.39e-05 |      1.60e-06 |       0.73112
     -0.00023 |       0.00000 |      4.34e-05 |      1.68e-06 |       0.73114
Evaluating losses...
     -0.00026 |       0.00000 |      4.30e-05 |      1.69e-06 |     

     -0.00017 |       0.00000 |       0.00203 |      1.47e-06 |       0.78758
     -0.00018 |       0.00000 |       0.00201 |      1.39e-06 |       0.78757
     -0.00019 |       0.00000 |       0.00199 |      1.47e-06 |       0.78759
     -0.00020 |       0.00000 |       0.00197 |      1.47e-06 |       0.78758
Evaluating losses...
     -0.00021 |       0.00000 |       0.00196 |      1.50e-06 |       0.78760
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.22           |
| EpThisIter      | 2              |
| EpisodesSoFar   | 7262           |
| TimeElapsed     | 3.09e+04       |
| TimestepsSoFar  | 19804160       |
| ev_tdlam_before | 0.461          |
| loss_ent        | 0.7876039      |
| loss_kl         | 1.4957075e-06  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00020923931 |
| loss_vf_loss    | 0.0019615293   |
------------------------------------
********** Iteration 4835 ************
Optimizing...
     pol_surr |   

********** Iteration 4840 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     3.32e-05 |       0.00000 |       0.00326 |      4.55e-07 |       0.75626
    -6.12e-05 |       0.00000 |       0.00323 |      6.75e-07 |       0.75632
    -7.02e-05 |       0.00000 |       0.00320 |      1.15e-06 |       0.75632
    -9.34e-05 |       0.00000 |       0.00317 |      8.60e-07 |       0.75635
    -9.68e-05 |       0.00000 |       0.00315 |      9.34e-07 |       0.75637
     -0.00012 |       0.00000 |       0.00312 |      1.02e-06 |       0.75636
     -0.00010 |       0.00000 |       0.00310 |      1.44e-06 |       0.75636
     -0.00012 |       0.00000 |       0.00308 |      1.28e-06 |       0.75640
     -0.00012 |       0.00000 |       0.00306 |      1.44e-06 |       0.75641
     -0.00014 |       0.00000 |       0.00304 |      1.54e-06 |       0.75635
Evaluating losses...
     -0.00015 |       0.00000 |       0.00303 |      1.42e-06 |     

     -0.00012 |       0.00000 |       0.00065 |      5.98e-07 |       0.76211
     -0.00013 |       0.00000 |       0.00064 |      6.33e-07 |       0.76207
     -0.00014 |       0.00000 |       0.00062 |      7.09e-07 |       0.76208
     -0.00014 |       0.00000 |       0.00061 |      7.40e-07 |       0.76206
     -0.00015 |       0.00000 |       0.00059 |      7.68e-07 |       0.76207
Evaluating losses...
     -0.00016 |       0.00000 |       0.00059 |      7.93e-07 |       0.76204
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.22          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 7277          |
| TimeElapsed     | 3.1e+04       |
| TimestepsSoFar  | 19849216      |
| ev_tdlam_before | -1.14         |
| loss_ent        | 0.7620389     |
| loss_kl         | 7.9327475e-07 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -0.0001571139 |
| loss_vf_loss    | 0.00058688095 |
-----------------------------------
*******

********** Iteration 4851 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     2.11e-05 |       0.00000 |      6.56e-05 |      2.83e-07 |       0.74108
    -6.29e-05 |       0.00000 |      6.04e-05 |      3.88e-07 |       0.74109
    -8.84e-05 |       0.00000 |      5.68e-05 |      4.40e-07 |       0.74111
     -0.00011 |       0.00000 |      5.39e-05 |      4.38e-07 |       0.74110
     -0.00013 |       0.00000 |      5.15e-05 |      5.25e-07 |       0.74110
     -0.00014 |       0.00000 |      4.95e-05 |      5.44e-07 |       0.74111
     -0.00015 |       0.00000 |      4.78e-05 |      6.26e-07 |       0.74111
     -0.00016 |       0.00000 |      4.64e-05 |      6.70e-07 |       0.74111
     -0.00017 |       0.00000 |      4.52e-05 |      7.06e-07 |       0.74112
     -0.00017 |       0.00000 |      4.42e-05 |      7.37e-07 |       0.74110
Evaluating losses...
     -0.00020 |       0.00000 |      4.38e-05 |      7.42e-07 |     

     -0.00010 |       0.00000 |       0.00313 |      4.33e-07 |       0.77099
     -0.00010 |       0.00000 |       0.00310 |      4.32e-07 |       0.77099
     -0.00010 |       0.00000 |       0.00308 |      4.72e-07 |       0.77101
     -0.00011 |       0.00000 |       0.00305 |      5.10e-07 |       0.77101
Evaluating losses...
     -0.00012 |       0.00000 |       0.00304 |      5.18e-07 |       0.77101
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.29           |
| EpThisIter      | 2              |
| EpisodesSoFar   | 7292           |
| TimeElapsed     | 3.11e+04       |
| TimestepsSoFar  | 19894272       |
| ev_tdlam_before | 0.613          |
| loss_ent        | 0.77101296     |
| loss_kl         | 5.183881e-07   |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -0.00011913048 |
| loss_vf_loss    | 0.0030383128   |
------------------------------------
********** Iteration 4857 ************
Optimizing...
     pol_surr |   

********** Iteration 4862 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     6.69e-06 |       0.00000 |       0.00038 |      8.63e-08 |       0.76145
    -3.74e-05 |       0.00000 |       0.00038 |      1.22e-07 |       0.76144
    -4.65e-05 |       0.00000 |       0.00037 |      1.55e-07 |       0.76145
    -5.63e-05 |       0.00000 |       0.00036 |      1.51e-07 |       0.76146
    -6.53e-05 |       0.00000 |       0.00035 |      1.72e-07 |       0.76144
    -6.80e-05 |       0.00000 |       0.00035 |      1.99e-07 |       0.76145
    -7.42e-05 |       0.00000 |       0.00034 |      2.19e-07 |       0.76144
    -7.70e-05 |       0.00000 |       0.00033 |      2.38e-07 |       0.76144
    -8.18e-05 |       0.00000 |       0.00033 |      2.40e-07 |       0.76144
    -8.20e-05 |       0.00000 |       0.00032 |      2.68e-07 |       0.76144
Evaluating losses...
    -9.05e-05 |       0.00000 |       0.00032 |      2.74e-07 |     

    -7.08e-05 |       0.00000 |       0.00036 |      1.51e-07 |       0.79003
    -7.27e-05 |       0.00000 |       0.00035 |      1.65e-07 |       0.79002
    -7.79e-05 |       0.00000 |       0.00035 |      1.71e-07 |       0.79003
Evaluating losses...
    -8.28e-05 |       0.00000 |       0.00035 |      1.85e-07 |       0.79003
-----------------------------------
| EpLenMean       | 3.01e+03      |
| EpRewMean       | 0.18          |
| EpThisIter      | 2             |
| EpisodesSoFar   | 7307          |
| TimeElapsed     | 3.11e+04      |
| TimestepsSoFar  | 19939328      |
| ev_tdlam_before | -1.2          |
| loss_ent        | 0.7900278     |
| loss_kl         | 1.8460429e-07 |
| loss_pol_entpen | 0.0           |
| loss_pol_surr   | -8.284056e-05 |
| loss_vf_loss    | 0.00034775908 |
-----------------------------------
********** Iteration 4868 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
    -1.32e-06 |       0.00000 | 

********** Iteration 4873 ************
Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     4.67e-06 |       0.00000 |       0.00360 |      1.20e-08 |       0.77053
    -1.46e-05 |       0.00000 |       0.00357 |      2.55e-08 |       0.77053
    -2.29e-05 |       0.00000 |       0.00356 |      3.64e-08 |       0.77052
    -2.59e-05 |       0.00000 |       0.00355 |      3.89e-08 |       0.77052
    -2.84e-05 |       0.00000 |       0.00354 |      4.22e-08 |       0.77052
    -3.03e-05 |       0.00000 |       0.00354 |      4.40e-08 |       0.77052
    -2.93e-05 |       0.00000 |       0.00353 |      5.09e-08 |       0.77052
    -3.34e-05 |       0.00000 |       0.00353 |      5.12e-08 |       0.77052
    -3.40e-05 |       0.00000 |       0.00352 |      5.30e-08 |       0.77052
    -3.59e-05 |       0.00000 |       0.00352 |      5.09e-08 |       0.77053
Evaluating losses...
    -3.98e-05 |       0.00000 |       0.00351 |      5.87e-08 |     

    -1.44e-05 |       0.00000 |       0.00298 |      1.38e-08 |       0.75267
    -1.48e-05 |       0.00000 |       0.00297 |      1.56e-08 |       0.75267
    -1.54e-05 |       0.00000 |       0.00297 |      1.27e-08 |       0.75267
    -1.51e-05 |       0.00000 |       0.00296 |      1.85e-08 |       0.75267
Evaluating losses...
    -1.58e-05 |       0.00000 |       0.00296 |      1.99e-08 |       0.75267
------------------------------------
| EpLenMean       | 3.01e+03       |
| EpRewMean       | 0.19           |
| EpThisIter      | 2              |
| EpisodesSoFar   | 7322           |
| TimeElapsed     | 3.12e+04       |
| TimestepsSoFar  | 19984384       |
| ev_tdlam_before | 0.265          |
| loss_ent        | 0.75267184     |
| loss_kl         | 1.9908953e-08  |
| loss_pol_entpen | 0.0            |
| loss_pol_surr   | -1.5771133e-05 |
| loss_vf_loss    | 0.0029577382   |
------------------------------------
********** Iteration 4879 ************
Optimizing...
     pol_surr |   

<stable_baselines.ppo1.pposgd_simple.PPO1 at 0x15ec2ff1b88>

### Save the model!!

In [14]:
model.save("ppo1\slimevolley_model")

In [15]:
model.save("ppo1\slimevolley_model_5000")

## running the same using the trained policy

In [20]:
obs = env.reset()
for i in range(1000):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()
env.close()

### Recording the game

In [32]:
from stable_baselines.common.vec_env import VecVideoRecorder, DummyVecEnv
import imageio
import numpy as np

In [36]:
images = []
img = model.env.render(mode='rgb_array')
for i in range(550):
    images.append(img)
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    img = model.env.render(mode='rgb_array')
    
imageio.mimsave('slimevolley.gif', [np.array(img[0]) for i, img in enumerate(images) if i%2 == 0], fps=50)

In [37]:
# Video
env_id = 'SlimeVolley-v0'
video_folder = 'ppo1/videos/'
video_length = 100

# env = DummyVecEnv([lambda: gym.make(env_id)])

# Record the video starting at the first step
env = VecVideoRecorder(env, video_folder,
                       record_video_trigger=lambda x: x == 0, video_length=video_length,
                       name_prefix="PPO_video")
for i in range(100):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()

AttributeError: 'SlimeVolleyEnv' object has no attribute 'num_envs'

## References:
https://arxiv.org/abs/1707.06347