In [1]:
import numpy as np
import os,sys


sys.path.append('../../../RL_lib/Agents')
sys.path.append('../../../RL_lib/Policies/PPO')
sys.path.append('../../../RL_lib/Policies/Common')
sys.path.append('../../../RL_lib/Utils')
sys.path.append('../../Env')
sys.path.append('../../Imaging')

%load_ext autoreload
%load_ext autoreload
%autoreload 2
%matplotlib nbagg
import os
print(os.getcwd())

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/Users/briangaudet/Study/Subjects/MachineLearning/Projects/RL4GNC/Exo_intercept/Experiments/Optimize_50cm


In [2]:
%%html
<style>
.output_wrapper, .output {
    height:auto !important;
    max-height:1000px;  /* your desired max-height here */
}
.output_scroll {
    box-shadow:none !important;
    webkit-box-shadow:none !important;
}
</style>

# Optimize Policy

In [4]:
from env import Env
import env_utils as envu
from reward_sensor_gaussian import Reward

import attitude_utils as attu

from missile import Missile
from target import Target

from missile_icgen import Missile_icgen
from target_icgen import Target_icgen

from dynamics_model_3dof import Dynamics_model_3dof as Target_dynamics_model
from dynamics_model_6dof import Dynamics_model_6dof as Missile_dynamics_model

from spiral_policy import Spiral_policy as Target_policy

from no_att_constraint import No_att_constraint
from no_w_constraint import No_w_constraint

######### RL vs PN ###########

is_RL = True

########## RL ###########

import rl_utils
from arch_policy_vf import Arch
import policy_nets as policy_nets
import valfunc_nets as vf_nets
from agent import Agent
from value_function import Value_function

if is_RL:
    from policy_ppo import Policy
    from softmax_pd import Softmax_pd as PD
else:
    from zem_policy import ZEM_policy as Policy
    
######### Actuator Models #########

from actuator_model_ekv import Actuator_model_ekv as Missile_actuator_model 
from actuator_model_3dof import Actuator_model_3dof as Target_actuator_model

######## Sensor ##############

from angle_sensor import Angle_sensor
from eo_model import EO_model
import optics_utils as optu

ap = attu.Quaternion_attitude()

offset=np.asarray([0,0])
C_cb = optu.rotate_optical_axis(0.0, np.pi/2, 0.0)
r_cb = np.asarray([0,0,0])
fov=np.pi-np.pi/8
cm = EO_model(attitude_parameterization=ap, C_cb=C_cb, r_cb=r_cb, 
                   fov=fov, debug=False, p_x=96,p_y=96)
sensor = Angle_sensor(cm, attitude_parameterization=ap,  use_range=True, ignore_fov_vio=not is_RL,
                      use_ideal_offset=False,
                      pool_type='max', state_type=Angle_sensor.optflow_state, optflow_scale=1.0)


########## Target ############

target_voffset = 10
target_max_acc = 5*9.81
target_max_acc_range = (0., target_max_acc)
target_dynamics_model = Target_dynamics_model(h=0.02,M=1e3)
target_actuator_model = Target_actuator_model(max_acc=target_max_acc)
#target_policy = Target_policy(3,max_acc_range=target_max_acc_range,tf=80)
target_policy = Target_policy(3,max_acc=target_max_acc,qp_range=(10,50))
target = Target(target_policy, target_actuator_model, target_dynamics_model, attitude_parameterization=ap)

target_icgen = Target_icgen(attitude_parameterization=ap,
                            min_init_position=(0.0, 0.0, 50000.),
                            max_init_position=(0.0, 0.0, 50000.),
                            v_mag=(4000., 4000.),
                            v_theta=(envu.deg2rad(90-target_voffset), envu.deg2rad(90+target_voffset)),
                            v_phi=(envu.deg2rad(-target_voffset), envu.deg2rad(target_voffset)))

########## Missile  #############

missile_roffset = 10
missile_mass = 50
missile_max_thrust =  10*9.81*missile_mass

missile_dynamics_model = Missile_dynamics_model(h=0.02,M=1e3)

missile_actuator_model = Missile_actuator_model(max_thrust=missile_max_thrust,pulsed=True)
missile = Missile(target, missile_actuator_model, missile_dynamics_model, sensor=sensor, 
                  attitude_parameterization=ap,
                  w_constraint=No_w_constraint(), att_constraint=No_att_constraint(ap),
                 align_cv=False, debug_cv=False, perturb_pn_velocity=True)
if not is_RL:
    missile.get_state_agent = missile.get_state_agent_PN_att
    
missile_icgen = Missile_icgen(attitude_parameterization=ap,
                           position_r=(50000.,55000.),
                           position_theta=(envu.deg2rad(90-missile_roffset),envu.deg2rad(90+missile_roffset)),
                           position_phi=(envu.deg2rad(-missile_roffset),envu.deg2rad(missile_roffset)),
                           mag_v=(3000,3000),
                           heading_error=(envu.deg2rad(0),envu.deg2rad(5)),
                           attitude_error=(0.0,0.0),
                           debug=False)


reward_object = Reward(debug=False, hit_coeff=10., tracking_coeff=1., tracking_sigma=0.10, optflow_sigma=0.04, 
                       fuel_coeff=0.0, fov_coeff=-0., hit_rlimit=0.5)

logger = rl_utils.Logger()

from render_traj_paper_6dof2 import render_traj
env = Env(missile, target, missile_icgen, target_icgen,  logger, render_func=render_traj,
          precision_range=1000., precision_scale=300, terminate_on_vc=not is_RL,
          reward_object=reward_object, use_offset=False, debug_steps=True,
          tf_limit=50.0,print_every=10,nav_period=0.04)

                

##########################################
recurrent_steps = 200
if is_RL:
    obs_dim = 4
    action_dim = 4
    actions_per_dim = 2
    logit_dim = action_dim * actions_per_dim
    policy = Policy(policy_nets.GRU1(obs_dim, logit_dim, recurrent_steps=recurrent_steps), 
                PD(action_dim, actions_per_dim),
                shuffle=False,
                kl_targ=0.001,epochs=20, beta=0.1, servo_kl=False, max_grad_norm=30, scale_vector_obs=True,
                init_func=rl_utils.xn_init)
else:
    policy = Policy(ap=ap, N=5, max_acc=missile_max_thrust / missile_mass)
    obs_dim = 19
    act_dim = 4
    
arch = Arch()

value_function = Value_function(vf_nets.GRU1(obs_dim, recurrent_steps=recurrent_steps), scale_obs=True,
                                shuffle=False, batch_size=9999999, max_grad_norm=30, 
                                verbose=False)

agent = Agent(arch, policy, value_function, None, env, logger,
              policy_episodes=30, policy_steps=3000, gamma1=0.90, gamma2=0.995, 
              recurrent_steps=recurrent_steps, monitor=env.rl_stats)

if is_RL:
    agent.train(300000)

Quaternion_attitude
Euler321 Attitude
C_cb: 
[[ 6.123234e-17  0.000000e+00 -1.000000e+00]
 [ 0.000000e+00  1.000000e+00  0.000000e+00]
 [ 1.000000e+00  0.000000e+00  6.123234e-17]]
[1.000000e+00 0.000000e+00 6.123234e-17]
using max  pooling
Angle sensor:
	Output State type:  <function Angle_sensor.optflow_state at 0x154703158>
	Offset Init type:  <function Angle_sensor.offset_init1 at 0x154700d08>
	Fixed Offset:  None
3dof dynamics model
3-dof Actuator Model:  49.050000000000004
Inertia Tensor:  [[333.33333333   0.           0.        ]
 [  0.         333.33333333   0.        ]
 [  0.           0.         333.33333333]]
Target Model: 
 - foo:  0.0
6dof dynamics model
thruster model:  [4905. 4905. 4905. 4905.]
Rotational Velocity Constraint
Attitude Constraint
Inertia Tensor:  [[6.25       0.         0.        ]
 [0.         7.29166667 0.        ]
 [0.         0.         7.29166667]]
Missile Model: 
 - foo:  0.0
Reward_terminal


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Missile env fixed (h= 0.02
[[-1.]
 [ 1.]]
Policy with vectorized sample
	xn_init: layer  Linear(in_features=4, out_features=40, bias=True)
	xn_init: layer  GRUCell(40, 56)
	xn_init: layer  Linear(in_features=56, out_features=80, bias=True)
	xn_init: layer  Linear(in_features=80, out_features=8, bias=True)
Policy: recurrent steps > 1, disabling shuffle
	Test Mode:          False
	Clip Param:         0.1
	Shuffle :           False
	Shuffle by Chunks:  False
	Max Grad Norm:      30
	Recurrent Steps:    200
	Rollout Limit:      1
	Advantage Func:     <advantage_utils.Adv_default object at 0x15b014780>
	Advantage Norm:     <function Adv_normalizer.apply at 0x1546fa620>
	PD:                 <softmax_pd.Softmax_pd object at 0x15b014550>
	Loss Function:      <bound method Policy.calc_loss1 of <policy_ppo.Policy object at 0x15b0145c0>>
Value Funtion
	xn_init: layer  Linear(in_features=4, out_features=40, bias=True)
	xn_init: layer  GRUCell(40, 14)
	xn_init: layer  Linear(in_features=14, out_fea

ADV1:  -0.0005087754741293438 0.10838538435299241 0.39295556027940237 -0.841993612529398
ADV2:  0.020904495907879454 0.8160646476898034 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0379   0.0102   0.0477   0.0549   0.0494   0.0102
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0243   0.0100   0.0491   1.9276   0.8688   0.6518
***** Episode 217, Mean R = 156.0  Std R = 12.7  Min R = 133.5
PolicyLoss: -0.0255
Policy_Beta: 0.1
Policy_Entropy: 0.0948
Policy_KL: 0.0019
Policy_SD: 0.809
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 4.79e+04
VF_0_ExplainedVarNew: 0.896
VF_0_ExplainedVarOld: 0.887
VF_0_Loss : 0.0682


Dynamics: Max Disturbance (m/s^2):  [1.57580650e-12 2.64163194e-13 2.11518260e-12] 2.6508386871678875e-12
Dynamics: Max Disturbance (m/s^2):  [1.43736058e-12 2.45084683e-13 2.68837425e-12] 3.058337465151787e-12
ADV1:  0.008433462594433436 0.09775828576610253 0.455145001411438 -0.7235396053096217
ADV2:  0.009748804537014221 0.7618966137089163 3.0 -3.0
Policy

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0254   0.0037   0.0294   0.0699   0.0551   0.0116
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0061   0.0018   0.0093   1.9276   0.8688   0.6518
***** Episode 496, Mean R = 156.5  Std R = 15.0  Min R = 133.1
PolicyLoss: 0.00356
Policy_Beta: 0.1
Policy_Entropy: 0.0978
Policy_KL: 0.00118
Policy_SD: 0.807
Policy_lr_mult: 1
Steps: 6.04e+03
TotalSteps: 1.02e+05
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.0951


ADV1:  0.002157550765821232 0.037730835438630914 0.23690787053958096 -0.4308464879464555
ADV2:  -0.0011312934245280327 0.715998648699374 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0501   0.0176   0.0969   0.0969   0.0551   0.0176
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0069   0.0025   0.0155   1.9276   0.8688   0.6518
***** Episode 527, Mean R = 157.0  Std R = 15.1  Min R = 132.2
PolicyLoss: -0.0107
Policy_Beta: 0.1
Policy_Entropy: 0.0988
Policy_KL: 0.00233
Policy_SD

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0012   0.0055   1.9276   0.8688   0.6518
***** Episode 775, Mean R = 157.8  Std R = 12.7  Min R = 134.3
PolicyLoss: -0.0098
Policy_Beta: 0.1
Policy_Entropy: 0.102
Policy_KL: 0.0013
Policy_SD: 0.812
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 1.56e+05
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.0915


ADV1:  0.001655122508264744 0.021416451228732068 0.23007731139659882 -0.12674818379801778
ADV2:  -0.023257591113050726 0.754303684345237 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0282   0.0020   0.0302   0.0969   0.0551   0.0176
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0012   0.0065   1.9276   0.8688   0.6518
***** Episode 806, Mean R = 156.6  Std R = 14.5  Min R = 130.4
PolicyLoss: 0.0102
Policy_Beta: 0.1
Policy_Entropy: 0.102
Policy_KL: 0.000835
Policy_SD: 0.814
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 1.62e+05
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedV

***** Episode 1054, Mean R = 154.5  Std R = 14.2  Min R = 129.8
PolicyLoss: -0.0233
Policy_Beta: 0.1
Policy_Entropy: 0.105
Policy_KL: 0.00172
Policy_SD: 0.818
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 2.1e+05
VF_0_ExplainedVarNew: 0.997
VF_0_ExplainedVarOld: 0.996
VF_0_Loss : 0.0608


ADV1:  -0.001266316980239654 0.01803788405118806 0.19477547867343845 -0.11813293674973496
ADV2:  0.013832129273053969 0.805078124267821 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0399   0.0096   0.0624   0.0969   0.0608   0.0176
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0015   0.0070   1.9276   0.8688   0.6518
***** Episode 1085, Mean R = 160.2  Std R = 12.7  Min R = 124.8
PolicyLoss: -0.00354
Policy_Beta: 0.1
Policy_Entropy: 0.106
Policy_KL: 0.00213
Policy_SD: 0.81
Policy_lr_mult: 1
Steps: 6.04e+03
TotalSteps: 2.16e+05
VF_0_ExplainedVarNew: 0.997
VF_0_ExplainedVarOld: 0.997
VF_0_Loss : 0.138


ADV1:  0.0016597144359176952 0.01974210079479043 0.10845658928155899 

ADV1:  -0.0009555490330943059 0.0208602760024073 0.06596150722862985 -0.2793813627105032
ADV2:  0.036560542615320955 0.7210503171642495 2.870954955287459 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0519   0.0163   0.0843   0.1004   0.0637   0.0212
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0053   0.0021   0.0103   1.9276   0.8688   0.6518
***** Episode 1364, Mean R = 164.2  Std R = 15.6  Min R = 132.8
PolicyLoss: -0.0346
Policy_Beta: 0.1
Policy_Entropy: 0.113
Policy_KL: 0.00109
Policy_SD: 0.814
Policy_lr_mult: 1
Steps: 6.04e+03
TotalSteps: 2.69e+05
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.189


ADV1:  0.002194414605667066 0.02209832264071601 0.14103706181049347 -0.31263151149926494
ADV2:  0.008284634033468146 0.7763651202605404 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0305   0.0053   0.0394   0.1004   0.0637   0.0212
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0015   0.0073   1.9276   0.8688   0

ADV1:  0.0013944844284993457 0.01775098128457635 0.15256105363368988 -0.24572389175037795
ADV2:  0.013148194341581095 0.6803346292623939 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0263   0.0085   0.0416   0.1224   0.0637   0.0280
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0011   0.0055   1.9276   0.8688   0.6518
***** Episode 1643, Mean R = 156.0  Std R = 15.3  Min R = 121.9
PolicyLoss: -0.0343
Policy_Beta: 0.1
Policy_Entropy: 0.12
Policy_KL: 0.001
Policy_SD: 0.824
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 3.23e+05
VF_0_ExplainedVarNew: 0.997
VF_0_ExplainedVarOld: 0.997
VF_0_Loss : 0.163


ADV1:  0.0003206919965921102 0.015034355640864831 0.06844359636306763 -0.21564853354306923
ADV2:  0.019488726560910594 0.6794414206670628 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0291   0.0069   0.0372   0.1224   0.0637   0.0280
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0008   0.0042   1.9276   0.8688   0.6518
***** Ep

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0036   0.0016   0.0083   1.9276   0.8688   0.6518
***** Episode 1922, Mean R = 159.5  Std R = 13.3  Min R = 131.2
PolicyLoss: -0.0208
Policy_Beta: 0.1
Policy_Entropy: 0.123
Policy_KL: 0.000944
Policy_SD: 0.833
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 3.77e+05
VF_0_ExplainedVarNew: 0.998
VF_0_ExplainedVarOld: 0.998
VF_0_Loss : 0.119


ADV1:  0.0038989388421520697 0.016177414469558567 0.1550302505493164 -0.2597545385360718
ADV2:  -0.05323495813303573 0.8105472135137489 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0392   0.0112   0.0702   0.1224   0.0637   0.0280
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0093   0.0040   0.0193   1.9276   0.8688   0.6518
***** Episode 1953, Mean R = 156.5  Std R = 10.4  Min R = 131.8
PolicyLoss: 0.00724
Policy_Beta: 0.1
Policy_Entropy: 0.128
Policy_KL: 0.00142
Policy_SD: 0.84
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 3.83e+05
VF_0_ExplainedVarNew: 0.998
VF_0_Explained

***** Episode 2201, Mean R = 153.2  Std R = 14.6  Min R = 131.4
PolicyLoss: -0.0368
Policy_Beta: 0.1
Policy_Entropy: 0.127
Policy_KL: 0.00102
Policy_SD: 0.827
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 4.32e+05
VF_0_ExplainedVarNew: 0.997
VF_0_ExplainedVarOld: 0.997
VF_0_Loss : 0.122


ADV1:  0.002461875245475452 0.012500585522234776 0.09704551100730896 -0.10099421441555023
ADV2:  -0.02615357003956752 0.848561727721759 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0708   0.0344   0.1642   0.1642   0.0708   0.0344
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0018   0.0084   1.9276   0.8688   0.6518
***** Episode 2232, Mean R = 155.0  Std R = 14.0  Min R = 132.8
PolicyLoss: -0.00756
Policy_Beta: 0.1
Policy_Entropy: 0.125
Policy_KL: 0.00189
Policy_SD: 0.829
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 4.38e+05
VF_0_ExplainedVarNew: 0.999
VF_0_ExplainedVarOld: 0.999
VF_0_Loss : 0.141


ADV1:  -0.0004419272666538574 0.014372398774313781 0.11347874999046326 

ADV1:  -0.0013278821058174224 0.017975474859223274 0.11866986751556396 -0.33409715442591037
ADV2:  0.04294270348863906 0.6266787130015311 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0169   0.0043   0.0217   0.1642   0.0708   0.0344
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0034   0.0015   0.0067   1.9276   0.8688   0.6518
***** Episode 2511, Mean R = 157.4  Std R = 15.3  Min R = 131.6
PolicyLoss: -0.0391
Policy_Beta: 0.1
Policy_Entropy: 0.125
Policy_KL: 0.00106
Policy_SD: 0.829
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 4.91e+05
VF_0_ExplainedVarNew: 0.997
VF_0_ExplainedVarOld: 0.997
VF_0_Loss : 0.117


ADV1:  0.0010837886079535864 0.013300526709502658 0.09299200773239136 -0.13153166575969977
ADV2:  0.02335936125288548 0.7092300024657857 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0373   0.0100   0.0526   0.1642   0.0708   0.0344
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0013   0.0055   1.9276   0.8688   0.6518
***** Ep

***** Episode 2790, Mean R = 163.0  Std R = 18.0  Min R = 127.2
PolicyLoss: -0.0448
Policy_Beta: 0.1
Policy_Entropy: 0.12
Policy_KL: 0.00123
Policy_SD: 0.827
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 5.45e+05
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.184


ADV1:  -0.002012529907103137 0.014631645882461956 0.094013512134552 -0.16362567408711914
ADV2:  0.05253889428925282 0.7193829232680247 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0229   0.0038   0.0275   0.1642   0.0708   0.0344
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0065   0.0033   0.0134   1.9276   0.8688   0.6518
***** Episode 2821, Mean R = 162.6  Std R = 14.1  Min R = 132.9
PolicyLoss: -0.0376
Policy_Beta: 0.1
Policy_Entropy: 0.122
Policy_KL: 0.0009
Policy_SD: 0.833
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 5.51e+05
VF_0_ExplainedVarNew: 0.998
VF_0_ExplainedVarOld: 0.998
VF_0_Loss : 0.142


ADV1:  0.005795788445985651 0.0156515167838726 0.059161546198470916 -0.0955

***** Episode 3100, Mean R = 161.4  Std R = 14.3  Min R = 131.6
PolicyLoss: -0.0353
Policy_Beta: 0.1
Policy_Entropy: 0.125
Policy_KL: 0.00106
Policy_SD: 0.829
Policy_lr_mult: 1
Steps: 6.00e+03
TotalSteps: 6.05e+05
VF_0_ExplainedVarNew: 0.998
VF_0_ExplainedVarOld: 0.998
VF_0_Loss : 0.166


ADV1:  0.0012850602214695027 0.011169666057543886 0.06541328132152557 -0.08467894742205728
ADV2:  0.0134743722504124 0.8534653057003618 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0809   0.0550   0.2556   0.2556   0.0987   0.0550
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0033   0.0018   0.0073   1.9276   0.8688   0.6518
***** Episode 3131, Mean R = 153.7  Std R = 12.4  Min R = 127.4
PolicyLoss: -0.0255
Policy_Beta: 0.1
Policy_Entropy: 0.126
Policy_KL: 0.00189
Policy_SD: 0.829
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 6.11e+05
VF_0_ExplainedVarNew: 0.999
VF_0_ExplainedVarOld: 0.999
VF_0_Loss : 0.0691


ADV1:  -0.0006664160772658993 0.011157312631628275 0.085429675877094

***** Episode 3410, Mean R = 158.3  Std R = 13.3  Min R = 133.6
PolicyLoss: -0.0458
Policy_Beta: 0.1
Policy_Entropy: 0.126
Policy_KL: 0.0014
Policy_SD: 0.827
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 6.65e+05
VF_0_ExplainedVarNew: 0.998
VF_0_ExplainedVarOld: 0.998
VF_0_Loss : 0.188


ADV1:  -0.0012159949347550715 0.013401786834469772 0.08780299127101898 -0.12978936970713795
ADV2:  0.03879315231982517 0.7651494432777948 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1044   0.0807   0.3339   0.3339   0.1074   0.0807
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0010   0.0051   1.9276   0.8688   0.6518
***** Episode 3441, Mean R = 161.2  Std R = 15.5  Min R = 133.6
PolicyLoss: -0.0306
Policy_Beta: 0.1
Policy_Entropy: 0.128
Policy_KL: 0.0015
Policy_SD: 0.835
Policy_lr_mult: 1
Steps: 6.00e+03
TotalSteps: 6.71e+05
VF_0_ExplainedVarNew: 0.998
VF_0_ExplainedVarOld: 0.998
VF_0_Loss : 0.142


ADV1:  0.0007568424125593315 0.013984955402577695 0.09766888618469238

theta_cv |    0.02 |    0.02 |    0.00 |    0.16
steps    |  192.80 |    6.57 |  179.00 |  214.00
***** Episode 3720, Mean R = 161.3  Std R = 14.8  Min R = 128.8
PolicyLoss: -0.0357
Policy_Beta: 0.1
Policy_Entropy: 0.126
Policy_KL: 0.00173
Policy_SD: 0.83
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 7.25e+05
VF_0_ExplainedVarNew: 0.998
VF_0_ExplainedVarOld: 0.998
VF_0_Loss : 0.21


ADV1:  0.001085051149615238 0.013406095726101868 0.06455785036087036 -0.11270269751548767
ADV2:  0.013474483473068994 0.8092821080734088 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0626   0.0182   0.1081   0.3516   0.1195   0.0866
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0009   0.0037   1.9276   0.8688   0.6518
***** Episode 3751, Mean R = 155.2  Std R = 17.5  Min R = 125.8
PolicyLoss: -0.0282
Policy_Beta: 0.1
Policy_Entropy: 0.13
Policy_KL: 0.00147
Policy_SD: 0.837
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 7.31e+05
VF_0_ExplainedVarNew: 0.998
VF_0_ExplainedVarOld: 0.

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |   -0.01   -0.11 |    0.27    1.73 |   -0.60   -3.14 |    0.59    3.14
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  161.23 |   15.36 |  125.81 |  197.19
tracking_error |  0.0488 |  0.1149 |  0.0000 |  1.3109
optflow_error |  0.1116 |  0.5586 |  0.0000 | 33.9836
pixel_icoords |   -0.27   -0.14 |    6.70    6.92 |  -14.71  -16.26 |   15.55   16.68
theta_cv |    0.02 |    0.02 |    0.00 |    0.19
steps    |  193.39 |    7.19 |  180.00 |  220.00
***** Episode 4030, Mean R = 166.2  Std R = 15.5  Min R = 130.7
PolicyLoss: -0.0294
Policy_Beta: 0.1
Policy_Entropy: 0.123
Policy_KL: 0.002

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.02   -0.05 |    0.26    1.75 |   -0.66   -3.13 |    0.61    3.10
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  161.19 |   14.39 |  131.55 |  197.61
tracking_error |  0.0473 |  0.1096 |  0.0000 |  1.2548
optflow_error |  0.1073 |  0.5329 |  0.0000 | 29.5929
pixel_icoords |    0.79    0.53 |    6.54    6.52 |  -17.45  -16.70 |   15.38   16.84
theta_cv |    0.03 |    0.03 |    0.00 |    0.24
steps    |  192.93 |    6.79 |  180.00 |  212.00
***** Episode 4340, Mean R = 154.9  Std R = 12.2  Min R = 135.9
PolicyLoss: -0.019
Policy_Beta: 0.1
Policy_Entropy: 0.14
Policy_KL: 0.00132

theta_cv |    0.03 |    0.03 |    0.00 |    0.25
steps    |  193.28 |    7.28 |  180.00 |  224.00
***** Episode 4650, Mean R = 160.5  Std R = 15.3  Min R = 137.2
PolicyLoss: -0.0328
Policy_Beta: 0.1
Policy_Entropy: 0.137
Policy_KL: 0.00135
Policy_SD: 0.826
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 9.05e+05
VF_0_ExplainedVarNew: 0.998
VF_0_ExplainedVarOld: 0.998
VF_0_Loss : 0.185


ADV1:  -2.169726912813456e-05 0.010637812824166617 0.06865459680557251 -0.09785416722297668
ADV2:  0.008297067958018276 0.809842507567293 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2490   0.1885   0.8306   0.9396   0.3634   0.1885
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0004   0.0018   1.9276   0.8688   0.6518
***** Episode 4681, Mean R = 163.3  Std R = 14.3  Min R = 140.7
PolicyLoss: -0.0115
Policy_Beta: 0.1
Policy_Entropy: 0.138
Policy_KL: 0.00136
Policy_SD: 0.835
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 9.11e+05
VF_0_ExplainedVarNew: 0.999
VF_0_ExplainedVarOld

pixel_icoords |    0.02    0.29 |    6.68    6.71 |  -15.94  -16.63 |   15.82   15.17
theta_cv |    0.03 |    0.04 |    0.00 |    0.30
steps    |  193.24 |    7.24 |  178.00 |  218.00
***** Episode 4960, Mean R = 161.9  Std R = 15.1  Min R = 126.2
PolicyLoss: -0.0261
Policy_Beta: 0.1
Policy_Entropy: 0.147
Policy_KL: 0.00084
Policy_SD: 0.844
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 9.64e+05
VF_0_ExplainedVarNew: 0.998
VF_0_ExplainedVarOld: 0.998
VF_0_Loss : 0.0713


ADV1:  0.0011227485094603602 0.010981076777090745 0.08406089647605669 -0.07508615105961025
ADV2:  -0.014833909104900551 0.8253123096046877 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1642   0.0933   0.4563   0.9396   0.3634   0.1928
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0008   0.0042   1.9276   0.8688   0.6518
***** Episode 4991, Mean R = 164.2  Std R = 14.3  Min R = 142.8
PolicyLoss: -0.0064
Policy_Beta: 0.1
Policy_Entropy: 0.148
Policy_KL: 0.00138
Policy_SD: 0.845
Policy_lr_mu

***** Episode 5270, Mean R = 167.7  Std R = 13.5  Min R = 132.3
PolicyLoss: -0.0437
Policy_Beta: 0.1
Policy_Entropy: 0.149
Policy_KL: 0.00112
Policy_SD: 0.849
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 1.02e+06
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.149


Dynamics: Max Disturbance (m/s^2):  [1.58763614e-12 3.18466002e-13 2.13438271e-12] 2.679104074489431e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.45084683e-13 2.68890320e-12] 3.065220687476527e-12
ADV1:  0.0003924469180548513 0.025780669607896694 0.08085315730408593 -0.5567405417143501
ADV2:  0.03042463823941314 0.6037825533879275 2.886330586585859 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0832   0.0346   0.1731   1.1234   0.5186   0.2403
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0015   0.0073   1.9276   0.8688   0.6518
***** Episode 5301, Mean R = 165.6  Std R = 14.9  Min R = 132.8
PolicyLoss: -0.0393
Policy_Beta: 0.1
Policy_Entropy: 0.15
Policy_KL: 0.

theta_cv |    0.04 |    0.05 |    0.00 |    0.36
steps    |  193.62 |    7.42 |  180.00 |  219.00
***** Episode 5580, Mean R = 170.0  Std R = 16.4  Min R = 136.3
PolicyLoss: -0.049
Policy_Beta: 0.1
Policy_Entropy: 0.156
Policy_KL: 0.00133
Policy_SD: 0.854
Policy_lr_mult: 1
Steps: 5.92e+03
TotalSteps: 1.08e+06
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.104


Dynamics: Max Disturbance (m/s^2):  [1.58763614e-12 3.18466002e-13 2.13438271e-12] 2.679104074489431e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.45084683e-13 2.68890320e-12] 3.065220687476527e-12
ADV1:  0.0027950154849163476 0.026403597985441108 0.12679061237542488 -0.43485765851813885
ADV2:  0.007860812051688967 0.6562383106251677 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2962   0.1460   0.6357   1.6099   0.7525   0.4045
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0078   0.0034   0.0163   1.9276   0.8688   0.6518
***** Episode 5611, Mean R = 171.8  Std R = 13.6  

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |   -0.03    0.02 |    0.27    1.74 |   -0.65   -3.13 |    0.56    3.13
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  170.98 |   14.75 |  130.70 |  207.76
tracking_error |  0.0321 |  0.0791 |  0.0000 |  1.1995
optflow_error |  0.1035 |  0.7418 |  0.0000 | 36.8217
pixel_icoords |   -0.65    0.30 |    6.52    6.50 |  -17.18  -17.10 |   15.55   15.54
theta_cv |    0.04 |    0.05 |    0.00 |    0.37
steps    |  193.09 |    7.39 |  179.00 |  223.00
***** Episode 5890, Mean R = 176.6  Std R = 14.7  Min R = 144.1
PolicyLoss: -0.0458
Policy_Beta: 0.1
Policy_Entropy: 0.158
Policy_KL: 0.001

tracking_error |  0.0305 |  0.0747 |  0.0000 |  1.2851
optflow_error |  0.1023 |  0.7368 |  0.0000 | 33.0596
pixel_icoords |   -0.31   -0.67 |    6.71    6.73 |  -14.03  -16.69 |   16.77   15.54
theta_cv |    0.05 |    0.06 |    0.00 |    0.38
steps    |  193.07 |    7.34 |  179.00 |  218.00
***** Episode 6200, Mean R = 177.5  Std R = 13.8  Min R = 144.7
PolicyLoss: -0.0486
Policy_Beta: 0.1
Policy_Entropy: 0.172
Policy_KL: 0.000928
Policy_SD: 0.855
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 1.2e+06
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.208


Dynamics: Max Disturbance (m/s^2):  [1.58763614e-12 3.18466002e-13 2.13438271e-12] 2.679104074489431e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.45084683e-13 2.68890320e-12] 3.065220687476527e-12
ADV1:  0.005735294127564524 0.027005422539104117 0.12984956837117145 -0.28761864554256344
ADV2:  -0.009071458738347462 0.7211364456700748 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3769   

w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  175.19 |   13.79 |  142.22 |  206.06
tracking_error |  0.0273 |  0.0693 |  0.0000 |  1.2189
optflow_error |  0.0976 |  0.7820 |  0.0000 | 32.3249
pixel_icoords |    0.13   -0.76 |    6.80    6.88 |  -14.93  -17.38 |   14.45   15.98
theta_cv |    0.05 |    0.06 |    0.00 |    0.36
steps    |  193.32 |    7.42 |  179.00 |  216.00
***** Episode 6510, Mean R = 177.7  Std R = 13.4  Min R = 148.3
PolicyLoss: -0.0302
Policy_Beta: 0.1
Policy_Entropy: 0.174
Policy_KL: 0.00171
Policy_SD: 0.856
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 1.26e+06
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.232


ADV1:  0.0035471966784850276 0.035473953578

tracking_error |  0.0280 |  0.0710 |  0.0000 |  1.1823
optflow_error |  0.1015 |  0.7608 |  0.0000 | 30.1733
pixel_icoords |    0.45   -0.70 |    6.89    6.50 |  -17.13  -16.16 |   15.21   15.83
theta_cv |    0.06 |    0.07 |    0.00 |    0.45
steps    |  193.11 |    6.99 |  180.00 |  219.00
***** Episode 6820, Mean R = 175.8  Std R = 10.6  Min R = 145.1
PolicyLoss: -0.0377
Policy_Beta: 0.1
Policy_Entropy: 0.189
Policy_KL: 0.000935
Policy_SD: 0.873
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 1.32e+06
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.124


ADV1:  0.0002032316826666602 0.03578849271728602 0.21330079660357304 -0.6070586088118362
ADV2:  0.024622716465154394 0.6959193292596061 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4887   0.2913   1.2696   3.0822   1.4110   0.6049
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0094   0.0048   0.0220   1.9276   0.8688   0.6518
***** Episode 6851, Mean R = 174.9  Std R = 11.0  Min R = 145.7
P

theta_cv |    0.06 |    0.07 |    0.00 |    0.39
steps    |  193.34 |    7.24 |  179.00 |  214.00
***** Episode 7130, Mean R = 180.6  Std R = 12.6  Min R = 132.2
PolicyLoss: -0.0351
Policy_Beta: 0.1
Policy_Entropy: 0.188
Policy_KL: 0.00113
Policy_SD: 0.865
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 1.38e+06
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.227


ADV1:  -0.0028910145838743078 0.03278697310942381 0.22931084036827087 -0.3451227684184014
ADV2:  0.04376005239335653 0.7708226307484897 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4985   0.2365   1.2894   3.0822   1.4110   0.6049
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0117   0.0059   0.0255   1.9276   0.8688   0.6518
***** Episode 7161, Mean R = 177.8  Std R = 11.2  Min R = 149.4
PolicyLoss: -0.0304
Policy_Beta: 0.1
Policy_Entropy: 0.196
Policy_KL: 0.00179
Policy_SD: 0.871
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 1.39e+06
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOl

a_f      |   -0.00    0.06 |    0.26    1.79 |   -0.61   -3.11 |    0.58    3.08
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  177.19 |   11.66 |  138.06 |  207.91
tracking_error |  0.0257 |  0.0692 |  0.0000 |  1.2819
optflow_error |  0.1054 |  0.8197 |  0.0000 | 37.8331
pixel_icoords |   -0.30   -0.39 |    6.86    6.74 |  -16.55  -16.51 |   15.60   17.49
theta_cv |    0.06 |    0.07 |    0.00 |    0.41
steps    |  193.23 |    7.10 |  179.00 |  216.00
***** Episode 7440, Mean R = 176.6  Std R = 9.6  Min R = 155.6
PolicyLoss: -0.0286
Policy_Beta: 0.1
Policy_Entropy: 0.2
Policy_KL: 0.000686
Policy_SD: 0.88
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 1.44e+06
VF_0_ExplainedVarNew: 0.984
VF_0_Explaine

theta_cv |    0.05 |    0.07 |    0.00 |    0.38
steps    |  192.72 |    7.26 |  180.00 |  221.00
***** Episode 7750, Mean R = 172.2  Std R = 12.7  Min R = 144.6
PolicyLoss: -0.0287
Policy_Beta: 0.1
Policy_Entropy: 0.203
Policy_KL: 0.00189
Policy_SD: 0.888
Policy_lr_mult: 1
Steps: 5.86e+03
TotalSteps: 1.5e+06
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.0849


Dynamics: Max Disturbance (m/s^2):  [1.58763614e-12 3.18466002e-13 2.13438271e-12] 2.679104074489431e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.45084683e-13 2.68890320e-12] 3.065220687476527e-12
ADV1:  -0.001425588004528843 0.027088260207114804 0.1296613885787889 -0.2606607112656284
ADV2:  0.024557979663850656 0.8284056145321023 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4224   0.1899   0.9841   3.0822   1.4110   0.6049
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0096   0.0044   0.0182   1.9276   0.8688   0.6518
***** Episode 7781, Mean R = 176.5  Std R = 13.1  M

optflow_error |  0.1045 |  0.8279 |  0.0000 | 37.1966
pixel_icoords |   -0.03   -0.02 |    6.59    6.59 |  -14.45  -14.92 |   16.12   15.36
theta_cv |    0.06 |    0.07 |    0.00 |    0.45
steps    |  193.21 |    6.66 |  180.00 |  215.00
***** Episode 8060, Mean R = 176.0  Std R = 10.6  Min R = 152.1
PolicyLoss: -0.0303
Policy_Beta: 0.1
Policy_Entropy: 0.218
Policy_KL: 0.000892
Policy_SD: 0.89
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 1.56e+06
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.147


ADV1:  -0.002981667310144999 0.027649573799246933 0.11430257237066821 -0.2805329610349572
ADV2:  0.054005994277696576 0.7200784449972468 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2206   0.0910   0.3881   3.0822   1.4110   0.6049
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0100   0.0057   0.0242   1.9276   0.8688   0.6518
***** Episode 8091, Mean R = 182.0  Std R = 10.0  Min R = 150.5
PolicyLoss: -0.0388
Policy_Beta: 0.1
Policy_Entropy: 0.2

optflow_error |  0.1054 |  0.8444 |  0.0000 | 39.0802
pixel_icoords |    0.01   -0.43 |    6.91    6.49 |  -17.99  -15.67 |   16.67   14.52
theta_cv |    0.06 |    0.07 |    0.00 |    0.41
steps    |  193.02 |    7.36 |  179.00 |  219.00
***** Episode 8370, Mean R = 175.4  Std R = 11.8  Min R = 146.7
PolicyLoss: -0.0239
Policy_Beta: 0.1
Policy_Entropy: 0.214
Policy_KL: 0.000981
Policy_SD: 0.884
Policy_lr_mult: 1
Steps: 5.92e+03
TotalSteps: 1.62e+06
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.154


ADV1:  -7.001212917693188e-05 0.023303039235560787 0.1292998215137639 -0.1993681775858428
ADV2:  0.018288368813107178 0.8210549350095807 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3592   0.1738   0.7605   3.0822   1.4110   0.6049
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0036   0.0017   0.0087   1.9276   0.8688   0.6518
***** Episode 8401, Mean R = 176.7  Std R = 10.3  Min R = 145.6
PolicyLoss: -0.0223
Policy_Beta: 0.1
Policy_Entropy: 0.2

***** Episode 8680, Mean R = 178.5  Std R = 12.2  Min R = 133.1
PolicyLoss: -0.0299
Policy_Beta: 0.1
Policy_Entropy: 0.211
Policy_KL: 0.000833
Policy_SD: 0.88
Policy_lr_mult: 1
Steps: 5.92e+03
TotalSteps: 1.68e+06
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.149


ADV1:  0.0005076703590514971 0.025157258373284214 0.1474841994011872 -0.21689477434422755
ADV2:  0.017293391950988522 0.789299281201456 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7695   0.3548   1.8965   3.0822   1.4110   0.6049
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0061   0.0024   0.0104   1.9276   0.8688   0.6518
***** Episode 8711, Mean R = 177.6  Std R = 9.5  Min R = 156.2
PolicyLoss: -0.0271
Policy_Beta: 0.1
Policy_Entropy: 0.215
Policy_KL: 0.00128
Policy_SD: 0.885
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 1.69e+06
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.148


ADV1:  -0.002141542740439299 0.0290295522405217 0.11468166698278259 -0.3

optflow_error |  0.1019 |  0.7980 |  0.0000 | 29.3021
pixel_icoords |    0.40    0.35 |    6.30    6.29 |  -17.88  -13.99 |   16.43   15.49
theta_cv |    0.06 |    0.07 |    0.00 |    0.41
steps    |  192.58 |    7.09 |  179.00 |  214.00
***** Episode 8990, Mean R = 178.1  Std R = 8.2  Min R = 156.1
PolicyLoss: -0.0366
Policy_Beta: 0.1
Policy_Entropy: 0.221
Policy_KL: 0.00122
Policy_SD: 0.891
Policy_lr_mult: 1
Steps: 5.92e+03
TotalSteps: 1.74e+06
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.0763


ADV1:  0.00028424195608103847 0.02327626977714904 0.09887813320808092 -0.4227552594989432
ADV2:  0.02164913774449707 0.646163616202098 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5371   0.2250   1.0607   3.0822   1.4110   0.6049
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0007   0.0038   1.9276   0.8688   0.6518
***** Episode 9021, Mean R = 181.0  Std R = 11.7  Min R = 157.1
PolicyLoss: -0.0338
Policy_Beta: 0.1
Policy_Entropy: 0.227


theta_cv |    0.06 |    0.07 |    0.00 |    0.42
steps    |  193.29 |    6.58 |  180.00 |  211.00
***** Episode 9300, Mean R = 177.8  Std R = 13.0  Min R = 153.5
PolicyLoss: -0.0323
Policy_Beta: 0.1
Policy_Entropy: 0.231
Policy_KL: 0.00132
Policy_SD: 0.896
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 1.8e+06
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.128


ADV1:  -0.0009366596624761449 0.01790501525135979 0.08770783532920245 -0.1627477412048467
ADV2:  0.029643451039557844 0.7845029041615273 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4869   0.1568   0.6906   3.0822   1.4110   0.6049
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0014   0.0061   1.9276   0.8688   0.6518
***** Episode 9331, Mean R = 178.0  Std R = 7.9  Min R = 156.8
PolicyLoss: -0.0289
Policy_Beta: 0.1
Policy_Entropy: 0.231
Policy_KL: 0.00101
Policy_SD: 0.897
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 1.81e+06
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld:

optflow_error |  0.1024 |  0.8054 |  0.0000 | 30.9086
pixel_icoords |    0.43    0.19 |    6.91    6.76 |  -15.29  -17.82 |   16.21   15.14
theta_cv |    0.06 |    0.07 |    0.00 |    0.48
steps    |  193.08 |    6.74 |  179.00 |  214.00
***** Episode 9610, Mean R = 179.0  Std R = 11.5  Min R = 149.1
PolicyLoss: -0.0267
Policy_Beta: 0.1
Policy_Entropy: 0.235
Policy_KL: 0.00112
Policy_SD: 0.9
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 1.86e+06
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.123


ADV1:  0.0007180862439627194 0.01980062236396064 0.15891873606871665 -0.1363880229429537
ADV2:  -0.008255506475896934 0.7727066896395879 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2179   0.1369   0.5055   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0048   0.0026   0.0098   1.9276   0.8688   0.6518
***** Episode 9641, Mean R = 179.9  Std R = 9.3  Min R = 154.2
PolicyLoss: 0.000659
Policy_Beta: 0.1
Policy_Entropy: 0.239

optflow_error |  0.1047 |  0.8242 |  0.0000 | 36.9707
pixel_icoords |    0.30   -0.31 |    6.75    6.39 |  -16.54  -16.85 |   15.71   14.33
theta_cv |    0.06 |    0.07 |    0.00 |    0.40
steps    |  192.75 |    6.90 |  179.00 |  216.00
***** Episode 9920, Mean R = 178.6  Std R = 10.6  Min R = 141.8
PolicyLoss: -0.0427
Policy_Beta: 0.1
Policy_Entropy: 0.231
Policy_KL: 0.00169
Policy_SD: 0.898
Policy_lr_mult: 1
Steps: 5.93e+03
TotalSteps: 1.92e+06
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.128


ADV1:  0.0021192024155816027 0.018826846243859947 0.14492687582969666 -0.13925143716627708
ADV2:  -0.011143036460630036 0.8398313172103048 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4236   0.2231   0.9692   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0068   0.0034   0.0134   1.9276   0.8688   0.6518
***** Episode 9951, Mean R = 178.4  Std R = 8.9  Min R = 153.2
PolicyLoss: -0.00302
Policy_Beta: 0.1
Policy_Entropy: 0

theta_cv |    0.06 |    0.07 |    0.00 |    0.41
steps    |  193.23 |    7.40 |  179.00 |  217.00
***** Episode 10230, Mean R = 180.1  Std R = 10.4  Min R = 155.1
PolicyLoss: -0.00887
Policy_Beta: 0.1
Policy_Entropy: 0.237
Policy_KL: 0.00131
Policy_SD: 0.896
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 1.98e+06
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.182


ADV1:  -0.0007165203212035175 0.01897296806811996 0.08309557486018437 -0.27241358160972595
ADV2:  0.037150631594543515 0.7126252095729249 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5085   0.2378   1.1528   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0012   0.0056   1.9276   0.8688   0.6518
***** Episode 10261, Mean R = 180.5  Std R = 7.2  Min R = 158.9
PolicyLoss: -0.0383
Policy_Beta: 0.1
Policy_Entropy: 0.23
Policy_KL: 0.00102
Policy_SD: 0.897
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 1.99e+06
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVa

theta_cv |    0.06 |    0.07 |    0.00 |    0.43
steps    |  193.43 |    7.21 |  179.00 |  223.00
***** Episode 10540, Mean R = 181.6  Std R = 11.0  Min R = 147.6
PolicyLoss: -0.0475
Policy_Beta: 0.1
Policy_Entropy: 0.23
Policy_KL: 0.0017
Policy_SD: 0.891
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 2.04e+06
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.188


Dynamics: Max Disturbance (m/s^2):  [1.58851172e-12 3.18466002e-13 2.13438271e-12] 2.6796230376245945e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.47800953e-13 2.68899340e-12] 3.065518181626573e-12
ADV1:  0.00411936335908233 0.01747990678901601 0.08441686666383763 -0.15607989734359107
ADV2:  -0.023873624917160004 0.824737379667697 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2662   0.1544   0.7378   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0123   0.0069   0.0278   1.9276   0.8688   0.6518
***** Episode 10571, Mean R = 181.2  Std R = 7.8  Mi

optflow_error |  0.1077 |  0.8196 |  0.0000 | 28.1296
pixel_icoords |   -0.66   -0.26 |    6.64    6.40 |  -14.62  -17.20 |   16.52   16.53
theta_cv |    0.06 |    0.07 |    0.00 |    0.42
steps    |  192.72 |    7.31 |  180.00 |  219.00
***** Episode 10850, Mean R = 182.6  Std R = 7.4  Min R = 165.9
PolicyLoss: -0.0403
Policy_Beta: 0.1
Policy_Entropy: 0.223
Policy_KL: 0.00078
Policy_SD: 0.883
Policy_lr_mult: 1
Steps: 6.04e+03
TotalSteps: 2.1e+06
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.186


ADV1:  -0.001919534747045781 0.018151980093214514 0.07371667708290042 -0.22911997839624088
ADV2:  0.05107306717359072 0.7333680794429073 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4002   0.2101   1.0817   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0040   0.0020   0.0082   1.9276   0.8688   0.6518
***** Episode 10881, Mean R = 182.4  Std R = 10.2  Min R = 163.2
PolicyLoss: -0.0288
Policy_Beta: 0.1
Policy_Entropy: 0.22

optflow_error |  0.1045 |  0.7794 |  0.0000 | 23.6651
pixel_icoords |   -0.17   -0.21 |    6.67    6.73 |  -16.52  -16.45 |   15.36   16.58
theta_cv |    0.06 |    0.07 |    0.00 |    0.43
steps    |  192.91 |    7.40 |  179.00 |  215.00
***** Episode 11160, Mean R = 181.9  Std R = 8.4  Min R = 154.8
PolicyLoss: -0.0315
Policy_Beta: 0.1
Policy_Entropy: 0.227
Policy_KL: 0.00133
Policy_SD: 0.889
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 2.16e+06
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.14


ADV1:  -0.0006006179240705794 0.016316403711404873 0.12457789298610983 -0.24341764186469272
ADV2:  0.026711768218308684 0.7111931948051369 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1203   0.0611   0.2620   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0008   0.0037   1.9276   0.8688   0.6518
***** Episode 11191, Mean R = 180.4  Std R = 8.8  Min R = 154.8
PolicyLoss: -0.0265
Policy_Beta: 0.1
Policy_Entropy: 0.

optflow_error |  0.1107 |  0.8619 |  0.0000 | 32.1043
pixel_icoords |    0.41    0.37 |    6.66    6.61 |  -16.25  -16.87 |   15.30   15.16
theta_cv |    0.07 |    0.08 |    0.00 |    0.41
steps    |  193.08 |    6.83 |  179.00 |  217.00
***** Episode 11470, Mean R = 180.1  Std R = 7.3  Min R = 163.8
PolicyLoss: -0.0345
Policy_Beta: 0.1
Policy_Entropy: 0.221
Policy_KL: 0.000935
Policy_SD: 0.887
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 2.22e+06
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.126


ADV1:  0.0025908659542142037 0.018104589521007235 0.12115297438458855 -0.2721187460810943
ADV2:  0.012688177049545531 0.7301909723869866 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1981   0.0870   0.3581   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0054   0.0028   0.0115   1.9276   0.8688   0.6518
***** Episode 11501, Mean R = 180.4  Std R = 6.8  Min R = 157.6
PolicyLoss: -0.0203
Policy_Beta: 0.1
Policy_Entropy: 0.

theta_cv |    0.06 |    0.07 |    0.00 |    0.39
steps    |  193.27 |    6.56 |  180.00 |  212.00
***** Episode 11780, Mean R = 183.0  Std R = 7.1  Min R = 166.3
PolicyLoss: -0.0325
Policy_Beta: 0.1
Policy_Entropy: 0.22
Policy_KL: 0.00128
Policy_SD: 0.886
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 2.28e+06
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.187


Dynamics: Max Disturbance (m/s^2):  [1.58851172e-12 3.18466002e-13 2.13438271e-12] 2.6796230376245945e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.47800953e-13 2.68899340e-12] 3.065518181626573e-12
ADV1:  0.0010426020519563302 0.01521880901412782 0.09178203109372918 -0.17680574610750244
ADV2:  0.013068675175797117 0.7459920240445848 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2603   0.1645   0.7279   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0051   0.0025   0.0104   1.9276   0.8688   0.6518
***** Episode 11811, Mean R = 180.6  Std R = 9.2  

optflow_error |  0.1074 |  0.8208 |  0.0000 | 36.2004
pixel_icoords |   -0.14   -0.49 |    6.62    6.55 |  -16.22  -18.58 |   18.56   17.31
theta_cv |    0.06 |    0.07 |    0.00 |    0.41
steps    |  193.76 |    6.69 |  180.00 |  218.00
***** Episode 12090, Mean R = 177.5  Std R = 8.6  Min R = 164.2
PolicyLoss: -0.0277
Policy_Beta: 0.1
Policy_Entropy: 0.242
Policy_KL: 0.00146
Policy_SD: 0.901
Policy_lr_mult: 1
Steps: 5.9e+03
TotalSteps: 2.34e+06
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.134


ADV1:  -0.001844628203601289 0.015432579933832617 0.0776631803857355 -0.2073474288696816
ADV2:  0.0358606338467031 0.7893996844605985 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3703   0.2468   1.1824   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0066   0.0036   0.0141   1.9276   0.8688   0.6518
***** Episode 12121, Mean R = 181.2  Std R = 8.8  Min R = 155.4
PolicyLoss: -0.0183
Policy_Beta: 0.1
Policy_Entropy: 0.237
P

optflow_error |  0.1072 |  0.8408 |  0.0000 | 42.1693
pixel_icoords |    0.52    0.09 |    6.56    6.41 |  -14.62  -15.34 |   16.02   15.20
theta_cv |    0.06 |    0.07 |    0.00 |    0.42
steps    |  192.18 |    7.41 |  179.00 |  220.00
***** Episode 12400, Mean R = 179.2  Std R = 9.7  Min R = 153.8
PolicyLoss: -0.0228
Policy_Beta: 0.1
Policy_Entropy: 0.24
Policy_KL: 0.00149
Policy_SD: 0.89
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 2.4e+06
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.126


ADV1:  -0.0022801012542559202 0.0166686221740308 0.08086804768678235 -0.2714190334021871
ADV2:  0.03816557527614361 0.7451980971131912 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4073   0.2015   0.9367   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0020   0.0090   1.9276   0.8688   0.6518
***** Episode 12431, Mean R = 180.2  Std R = 6.6  Min R = 164.8
PolicyLoss: -0.0317
Policy_Beta: 0.1
Policy_Entropy: 0.234
Po

theta_cv |    0.06 |    0.07 |    0.00 |    0.36
steps    |  193.01 |    6.64 |  179.00 |  216.00
***** Episode 12710, Mean R = 183.2  Std R = 8.1  Min R = 166.4
PolicyLoss: -0.0345
Policy_Beta: 0.1
Policy_Entropy: 0.236
Policy_KL: 0.001
Policy_SD: 0.886
Policy_lr_mult: 1
Steps: 6.04e+03
TotalSteps: 2.46e+06
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.222


ADV1:  0.0009541754866615363 0.014918938785637215 0.07591896295676193 -0.1770579486072159
ADV2:  0.022565930296033987 0.6688493170936374 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2654   0.1942   0.8680   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0007   0.0040   1.9276   0.8688   0.6518
***** Episode 12741, Mean R = 182.5  Std R = 6.4  Min R = 164.9
PolicyLoss: -0.0421
Policy_Beta: 0.1
Policy_Entropy: 0.234
Policy_KL: 0.00111
Policy_SD: 0.889
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 2.47e+06
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0

theta_cv |    0.06 |    0.07 |    0.00 |    0.37
steps    |  193.55 |    6.79 |  180.00 |  214.00
***** Episode 13020, Mean R = 178.9  Std R = 7.0  Min R = 165.6
PolicyLoss: -0.00436
Policy_Beta: 0.1
Policy_Entropy: 0.253
Policy_KL: 0.000894
Policy_SD: 0.897
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 2.52e+06
VF_0_ExplainedVarNew: 0.997
VF_0_ExplainedVarOld: 0.997
VF_0_Loss : 0.0923


Dynamics: Max Disturbance (m/s^2):  [1.58851172e-12 3.18466002e-13 2.13438271e-12] 2.6796230376245945e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.47800953e-13 2.68899340e-12] 3.065518181626573e-12
ADV1:  -0.0033753113950315333 0.011723332662129632 0.05898893875560096 -0.13331123136469814
ADV2:  0.039190526843287415 0.8440086159599594 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2271   0.1939   0.8310   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0058   0.0029   0.0119   1.9276   0.8688   0.6518
***** Episode 13051, Mean R = 177.0  Std R =

optflow_error |  0.1048 |  0.7959 |  0.0000 | 43.5935
pixel_icoords |   -0.12   -0.42 |    6.68    6.89 |  -14.66  -16.47 |   16.75   15.19
theta_cv |    0.07 |    0.07 |    0.00 |    0.37
steps    |  193.24 |    7.33 |  179.00 |  217.00
***** Episode 13330, Mean R = 180.2  Std R = 9.1  Min R = 165.1
PolicyLoss: 0.00499
Policy_Beta: 0.1
Policy_Entropy: 0.261
Policy_KL: 0.000866
Policy_SD: 0.89
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 2.58e+06
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.997
VF_0_Loss : 0.168


ADV1:  -0.0014677588728007028 0.012419438848276093 0.19371706247329712 -0.0985477799701302
ADV2:  0.01294760199716727 0.7692264790885605 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3110   0.1746   0.7189   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0013   0.0053   1.9276   0.8688   0.6518
***** Episode 13361, Mean R = 179.4  Std R = 8.6  Min R = 164.9
PolicyLoss: -0.00209
Policy_Beta: 0.1
Policy_Entropy: 0.256

***** Episode 13640, Mean R = 182.5  Std R = 8.2  Min R = 168.8
PolicyLoss: -0.0141
Policy_Beta: 0.1
Policy_Entropy: 0.259
Policy_KL: 0.00133
Policy_SD: 0.895
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 2.64e+06
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.113


ADV1:  0.001037267452695256 0.016222660880926804 0.09088832444923432 -0.27775540584174485
ADV2:  0.023364144064463476 0.6694975237478272 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5260   0.3266   1.3755   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0047   0.0025   0.0096   1.9276   0.8688   0.6518
***** Episode 13671, Mean R = 182.4  Std R = 7.7  Min R = 168.8
PolicyLoss: -0.0366
Policy_Beta: 0.1
Policy_Entropy: 0.256
Policy_KL: 0.00134
Policy_SD: 0.891
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 2.65e+06
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.115


ADV1:  -0.00014501183223964939 0.016105427881494574 0.07158230112685

***** Episode 13950, Mean R = 180.6  Std R = 7.6  Min R = 165.4
PolicyLoss: -0.00372
Policy_Beta: 0.1
Policy_Entropy: 0.271
Policy_KL: 0.00106
Policy_SD: 0.891
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 2.7e+06
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.145


ADV1:  -0.0032411743461917076 0.017850604921231382 0.08389238044990516 -0.20510836026890433
ADV2:  0.04289931230688699 0.758524983204329 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6098   0.3589   1.5091   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0093   0.0049   0.0196   1.9276   0.8688   0.6518
***** Episode 13981, Mean R = 180.0  Std R = 8.3  Min R = 166.2
PolicyLoss: -0.0382
Policy_Beta: 0.1
Policy_Entropy: 0.267
Policy_KL: 0.00199
Policy_SD: 0.9
Policy_lr_mult: 1
Steps: 5.92e+03
TotalSteps: 2.71e+06
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.0453


Dynamics: Max Disturbance (m/s^2):  [1.58851172e-12 3.18466002e-13 2.

optflow_error |  0.1023 |  0.8566 |  0.0000 | 38.6278
pixel_icoords |   -0.67   -0.35 |    7.03    6.28 |  -16.91  -14.81 |   16.47   15.98
theta_cv |    0.07 |    0.06 |    0.00 |    0.34
steps    |  192.80 |    7.05 |  179.00 |  214.00
***** Episode 14260, Mean R = 183.3  Std R = 7.2  Min R = 167.4
PolicyLoss: -0.0262
Policy_Beta: 0.1
Policy_Entropy: 0.259
Policy_KL: 0.00117
Policy_SD: 0.883
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 2.76e+06
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.146


ADV1:  0.0010527451217185561 0.015341882094337761 0.2162718027830124 -0.15882436256377297
ADV2:  -0.011511128283164774 0.7784772756317532 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5120   0.3536   1.9100   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0218   0.0129   0.0479   1.9276   0.8688   0.6518
***** Episode 14291, Mean R = 180.8  Std R = 9.0  Min R = 157.3
PolicyLoss: -0.00524
Policy_Beta: 0.1
Policy_Entropy: 0

optflow_error |  0.1031 |  0.8391 |  0.0000 | 28.7266
pixel_icoords |   -0.05   -0.00 |    7.22    7.20 |  -15.94  -17.18 |   16.42   17.13
theta_cv |    0.07 |    0.06 |    0.00 |    0.33
steps    |  194.35 |    7.25 |  180.00 |  219.00
***** Episode 14570, Mean R = 185.7  Std R = 8.4  Min R = 169.4
PolicyLoss: -0.046
Policy_Beta: 0.1
Policy_Entropy: 0.263
Policy_KL: 0.000823
Policy_SD: 0.888
Policy_lr_mult: 1
Steps: 6.06e+03
TotalSteps: 2.82e+06
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.18


ADV1:  -0.0013222689139350467 0.02323459070148879 0.1790972899466856 -0.3275721669738487
ADV2:  0.04653483728668619 0.6376810314282736 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5652   0.3384   1.4098   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0077   0.0036   0.0157   1.9276   0.8688   0.6518
***** Episode 14601, Mean R = 183.5  Std R = 11.4  Min R = 166.6
PolicyLoss: -0.0448
Policy_Beta: 0.1
Policy_Entropy: 0.26


theta_cv |    0.07 |    0.06 |    0.00 |    0.34
steps    |  192.99 |    7.29 |  179.00 |  214.00
***** Episode 14880, Mean R = 185.0  Std R = 9.3  Min R = 164.5
PolicyLoss: -0.0311
Policy_Beta: 0.1
Policy_Entropy: 0.26
Policy_KL: 0.00149
Policy_SD: 0.885
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 2.88e+06
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.249


ADV1:  0.0005570854042310019 0.014165487062775126 0.07707006212893872 -0.1508381388376292
ADV2:  0.013285985598661058 0.7293187866623554 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2004   0.0846   0.3707   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0032   0.0015   0.0063   1.9276   0.8688   0.6518
***** Episode 14911, Mean R = 183.2  Std R = 9.5  Min R = 166.0
PolicyLoss: -0.0279
Policy_Beta: 0.1
Policy_Entropy: 0.264
Policy_KL: 0.000727
Policy_SD: 0.886
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 2.89e+06
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarO

tracking_error |  0.0189 |  0.0597 |  0.0000 |  1.2304
optflow_error |  0.0984 |  0.8735 |  0.0000 | 37.4173
pixel_icoords |   -0.46   -0.10 |    6.76    6.79 |  -15.98  -15.92 |   15.16   15.82
theta_cv |    0.07 |    0.06 |    0.00 |    0.34
steps    |  193.55 |    7.12 |  179.00 |  216.00
***** Episode 15190, Mean R = 181.5  Std R = 8.3  Min R = 167.4
PolicyLoss: 0.0157
Policy_Beta: 0.1
Policy_Entropy: 0.277
Policy_KL: 0.00113
Policy_SD: 0.892
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 2.94e+06
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.996
VF_0_Loss : 0.126


Dynamics: Max Disturbance (m/s^2):  [1.58851172e-12 3.18466002e-13 2.13438271e-12] 2.6796230376245945e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.47800953e-13 2.68899340e-12] 3.065518181626573e-12
ADV1:  -0.0008189141598426986 0.011839383755873008 0.12631373318560196 -0.1536814046123418
ADV2:  0.020737334516428667 0.7526638149511509 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4932   

optflow_error |  0.0953 |  0.8244 |  0.0000 | 42.0874
pixel_icoords |    0.36    0.79 |    6.62    6.35 |  -16.33  -17.50 |   16.29   15.36
theta_cv |    0.07 |    0.06 |    0.00 |    0.35
steps    |  192.52 |    7.17 |  179.00 |  216.00
***** Episode 15500, Mean R = 185.1  Std R = 9.3  Min R = 162.1
PolicyLoss: -0.0327
Policy_Beta: 0.1
Policy_Entropy: 0.268
Policy_KL: 0.000746
Policy_SD: 0.89
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 3e+06
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.148


ADV1:  -0.00011241812962812284 0.020188259642592406 0.09497160151282524 -0.31566066716421026
ADV2:  0.028260824056830905 0.6460569769557986 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3290   0.2370   0.9728   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0013   0.0056   1.9276   0.8688   0.6518
***** Episode 15531, Mean R = 186.0  Std R = 9.2  Min R = 166.6
PolicyLoss: -0.0347
Policy_Beta: 0.1
Policy_Entropy: 0.27

***** Episode 15810, Mean R = 184.7  Std R = 7.5  Min R = 171.9
PolicyLoss: 0.00112
Policy_Beta: 0.1
Policy_Entropy: 0.28
Policy_KL: 0.00122
Policy_SD: 0.885
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 3.06e+06
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.191


ADV1:  -0.006698538347759698 0.026510773418378195 0.09378755837678909 -0.480307718305568
ADV2:  0.08841672753336234 0.6112043881420177 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4127   0.1838   0.8499   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0229   0.0132   0.0502   1.9276   0.8688   0.6518
***** Episode 15841, Mean R = 185.8  Std R = 8.4  Min R = 165.3
PolicyLoss: -0.0503
Policy_Beta: 0.1
Policy_Entropy: 0.261
Policy_KL: 0.00145
Policy_SD: 0.878
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 3.07e+06
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.185


ADV1:  -0.00238211703218403 0.026270880297564116 0.180780827999115 -0.4342

theta_cv |    0.07 |    0.06 |    0.00 |    0.33
steps    |  193.01 |    7.07 |  179.00 |  216.00
***** Episode 16120, Mean R = 187.8  Std R = 8.1  Min R = 172.4
PolicyLoss: -0.0279
Policy_Beta: 0.1
Policy_Entropy: 0.287
Policy_KL: 0.00128
Policy_SD: 0.887
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 3.12e+06
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.2


Dynamics: Max Disturbance (m/s^2):  [1.58925643e-12 3.18466002e-13 2.13438271e-12] 2.680064574915074e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.47800953e-13 2.68899340e-12] 3.065518181626573e-12
ADV1:  -0.00026031221527083676 0.022246677630536966 0.2396516054868698 -0.4340574040158621
ADV2:  0.015361838579220218 0.6201728842442595 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3214   0.1771   1.0153   3.0822   1.4110   0.6314
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0048   0.0021   0.0090   1.9276   0.8688   0.6518
***** Episode 16151, Mean R = 186.5  Std R = 9.9  M

***** Episode 16430, Mean R = 185.6  Std R = 9.6  Min R = 163.9
PolicyLoss: 0.000158
Policy_Beta: 0.1
Policy_Entropy: 0.272
Policy_KL: 0.00078
Policy_SD: 0.888
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 3.18e+06
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.165


ADV1:  -0.0007477391896555251 0.02388438149055083 0.11987086045496932 -0.3667122345180005
ADV2:  0.03716875067492015 0.6282489440284686 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4329   0.1933   0.9809   3.0822   1.4110   0.6445
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0012   0.0054   1.9276   0.8688   0.6518
***** Episode 16461, Mean R = 187.3  Std R = 11.1  Min R = 167.4
PolicyLoss: -0.0391
Policy_Beta: 0.1
Policy_Entropy: 0.275
Policy_KL: 0.00124
Policy_SD: 0.89
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 3.19e+06
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.164


ADV1:  -0.0028016224955831753 0.02581482104190556 0.17279439899243715 -

theta_cv |    0.06 |    0.05 |    0.00 |    0.32
steps    |  193.00 |    6.97 |  179.00 |  220.00
***** Episode 16740, Mean R = 187.3  Std R = 6.7  Min R = 177.6
PolicyLoss: -0.0154
Policy_Beta: 0.1
Policy_Entropy: 0.267
Policy_KL: 0.000883
Policy_SD: 0.894
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 3.24e+06
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.165


ADV1:  -0.0031292452874386385 0.026662362188375983 0.11258066762027297 -0.36091004002737237
ADV2:  0.0495548604910556 0.7124548644963036 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6767   0.3975   1.7813   3.0822   1.4110   0.6445
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0068   0.0036   0.0152   1.9276   0.8688   0.6518
***** Episode 16771, Mean R = 188.0  Std R = 8.5  Min R = 168.0
PolicyLoss: -0.0388
Policy_Beta: 0.1
Policy_Entropy: 0.258
Policy_KL: 0.00161
Policy_SD: 0.894
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 3.25e+06
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVar

a_f      |    0.01    0.11 |    0.28    1.78 |   -0.63   -3.13 |    0.63    3.10
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  186.69 |    9.07 |  166.96 |  212.78
tracking_error |  0.0126 |  0.0422 |  0.0000 |  1.1798
optflow_error |  0.0726 |  0.8664 |  0.0000 | 42.8145
pixel_icoords |    0.15   -0.34 |    6.81    6.85 |  -16.24  -16.97 |   15.24   17.83
theta_cv |    0.06 |    0.05 |    0.00 |    0.28
steps    |  193.18 |    7.58 |  180.00 |  217.00
***** Episode 17050, Mean R = 186.8  Std R = 10.7  Min R = 167.0
PolicyLoss: -0.00805
Policy_Beta: 0.1
Policy_Entropy: 0.262
Policy_KL: 0.00152
Policy_SD: 0.893
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 3.3e+06
VF_0_ExplainedVarNew: 0.979
VF_0_Expl

***** Episode 17360, Mean R = 186.8  Std R = 8.4  Min R = 170.2
PolicyLoss: -0.0123
Policy_Beta: 0.1
Policy_Entropy: 0.239
Policy_KL: 0.00189
Policy_SD: 0.896
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 3.36e+06
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.101


ADV1:  -0.001407649604749723 0.029583202959124293 0.16338290125540955 -0.6487540196800591
ADV2:  0.04220411593408523 0.6374425212807134 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4911   0.2615   1.1581   3.0822   1.4637   0.6445
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0062   0.0032   0.0129   1.9276   0.8688   0.6518
***** Episode 17391, Mean R = 187.0  Std R = 9.1  Min R = 171.9
PolicyLoss: -0.0411
Policy_Beta: 0.1
Policy_Entropy: 0.229
Policy_KL: 0.000822
Policy_SD: 0.883
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 3.37e+06
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.172


ADV1:  0.002525568389341981 0.03236951333679897 0.36037289779028914 

tracking_error |  0.0102 |  0.0363 |  0.0000 |  1.2356
optflow_error |  0.0644 |  0.8722 |  0.0000 | 46.4152
pixel_icoords |   -0.62    0.46 |    7.12    6.67 |  -17.41  -16.12 |   14.36   16.27
theta_cv |    0.05 |    0.05 |    0.00 |    0.26
steps    |  193.55 |    7.26 |  180.00 |  226.00
***** Episode 17670, Mean R = 187.9  Std R = 7.1  Min R = 173.5
PolicyLoss: -0.0374
Policy_Beta: 0.1
Policy_Entropy: 0.24
Policy_KL: 0.00102
Policy_SD: 0.897
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 3.42e+06
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.122


ADV1:  0.0015907459494928435 0.025899780478590685 0.17967046093756822 -0.37125195211297135
ADV2:  0.011019922650644685 0.7146446131699882 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6413   0.3411   1.6677   3.1749   1.4637   0.7026
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0080   0.0051   0.0207   1.9276   0.8688   0.6518
***** Episode 17701, Mean R = 186.0  Std R = 9.1  Min R = 169.8
P

optflow_error |  0.0618 |  0.7970 |  0.0000 | 44.4767
pixel_icoords |   -0.75    0.26 |    6.67    6.44 |  -16.26  -15.84 |   16.15   15.37
theta_cv |    0.06 |    0.05 |    0.00 |    0.29
steps    |  193.16 |    6.94 |  180.00 |  217.00
***** Episode 17980, Mean R = 186.4  Std R = 6.9  Min R = 172.1
PolicyLoss: -0.0268
Policy_Beta: 0.1
Policy_Entropy: 0.246
Policy_KL: 0.00157
Policy_SD: 0.896
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 3.48e+06
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.126


Dynamics: Max Disturbance (m/s^2):  [1.58925643e-12 3.18466002e-13 2.13438271e-12] 2.680064574915074e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.47800953e-13 2.68899340e-12] 3.065518181626573e-12
ADV1:  -0.0004964128212367274 0.0269328433631589 0.21714584654858238 -0.5039337896737015
ADV2:  0.02750597077260619 0.6780326507239184 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6706   0.3169   1.6088   3.1749   1.4637   0.7026
ValFun  Gradien

***** Episode 18290, Mean R = 189.0  Std R = 6.5  Min R = 174.8
PolicyLoss: -0.0246
Policy_Beta: 0.1
Policy_Entropy: 0.24
Policy_KL: 0.00124
Policy_SD: 0.899
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 3.54e+06
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.0972


ADV1:  0.00016564509461571984 0.024326998644991854 0.1898879110813141 -0.4652587921304451
ADV2:  0.013269908524150558 0.7203051077793156 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5603   0.4494   2.1680   3.1749   1.4637   0.7026
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0009   0.0040   1.9276   0.8688   0.6518
***** Episode 18321, Mean R = 187.0  Std R = 8.4  Min R = 165.8
PolicyLoss: -0.0175
Policy_Beta: 0.1
Policy_Entropy: 0.253
Policy_KL: 0.00098
Policy_SD: 0.901
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 3.55e+06
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.121


ADV1:  -0.0018720028663784026 0.031028344223868912 0.269439488848625

***** Episode 18600, Mean R = 188.5  Std R = 9.3  Min R = 172.8
PolicyLoss: -0.0269
Policy_Beta: 0.1
Policy_Entropy: 0.235
Policy_KL: 0.00103
Policy_SD: 0.883
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 3.6e+06
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.191


ADV1:  0.003385559759416994 0.03163628821928796 0.4382322446752258 -0.4574364423751831
ADV2:  -0.04045590505995717 0.6405065716793019 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6531   0.3723   1.7599   3.1749   1.4637   0.7026
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0164   0.0092   0.0377   1.9276   0.8688   0.6518
***** Episode 18631, Mean R = 190.5  Std R = 9.2  Min R = 174.0
PolicyLoss: 0.00899
Policy_Beta: 0.1
Policy_Entropy: 0.251
Policy_KL: 0.00122
Policy_SD: 0.884
Policy_lr_mult: 1
Steps: 6.06e+03
TotalSteps: 3.61e+06
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.961
VF_0_Loss : 0.268


Dynamics: Max Disturbance (m/s^2):  [1.58925643e-12 3.18466002e-13 2.134

***** Episode 18910, Mean R = 187.1  Std R = 7.1  Min R = 171.5
PolicyLoss: -0.0489
Policy_Beta: 0.1
Policy_Entropy: 0.252
Policy_KL: 0.00171
Policy_SD: 0.898
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 3.66e+06
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.123


Dynamics: Max Disturbance (m/s^2):  [1.58925643e-12 3.18466002e-13 2.13438271e-12] 2.680064574915074e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.47800953e-13 2.68899340e-12] 3.065518181626573e-12
ADV1:  0.00032791503985645133 0.03213866925011204 0.31240931591046794 -0.556750235488351
ADV2:  0.007257859535936734 0.6560591669548977 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7980   0.5545   2.5814   3.4634   1.4637   0.7026
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0066   0.0035   0.0148   1.9276   0.8688   0.6518
***** Episode 18941, Mean R = 187.0  Std R = 9.6  Min R = 173.1
PolicyLoss: -0.0139
Policy_Beta: 0.1
Policy_Entropy: 0.242
Policy_KL: 0.00184
Policy_

theta_cv |    0.05 |    0.05 |    0.00 |    0.27
steps    |  192.38 |    7.01 |  179.00 |  214.00
***** Episode 19220, Mean R = 190.5  Std R = 6.0  Min R = 182.5
PolicyLoss: 0.00321
Policy_Beta: 0.1
Policy_Entropy: 0.244
Policy_KL: 0.00136
Policy_SD: 0.893
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 3.72e+06
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.161


ADV1:  -0.0017022557321210731 0.026110585195142103 0.18178159779491998 -0.452291878515947
ADV2:  0.028257193874221764 0.710774325091549 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6725   0.4404   2.2371   4.5523   1.4637   0.9208
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0045   0.0021   0.0091   1.9276   0.8688   0.6518
***** Episode 19251, Mean R = 189.7  Std R = 7.7  Min R = 175.5
PolicyLoss: -0.0212
Policy_Beta: 0.1
Policy_Entropy: 0.248
Policy_KL: 0.000791
Policy_SD: 0.893
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 3.72e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld:

theta_cv |    0.06 |    0.05 |    0.00 |    0.30
steps    |  192.59 |    7.05 |  180.00 |  219.00
***** Episode 19530, Mean R = 188.5  Std R = 8.5  Min R = 171.6
PolicyLoss: -0.00761
Policy_Beta: 0.1
Policy_Entropy: 0.25
Policy_KL: 0.00187
Policy_SD: 0.895
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 3.78e+06
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.172


ADV1:  -0.0009193828647580468 0.030253761312854656 0.3393043775716241 -0.4259348153084175
ADV2:  0.01482443961578319 0.7008315770133038 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5469   0.3317   1.2872   4.9816   1.4637   0.9810
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0034   0.0008   0.0051   1.9276   0.8688   0.6518
***** Episode 19561, Mean R = 189.6  Std R = 8.0  Min R = 173.9
PolicyLoss: -0.0146
Policy_Beta: 0.1
Policy_Entropy: 0.246
Policy_KL: 0.00115
Policy_SD: 0.904
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 3.78e+06
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOl

optflow_error |  0.0460 |  0.7270 |  0.0000 | 39.6603
pixel_icoords |    0.27    0.54 |    6.30    6.64 |  -16.30  -15.95 |   15.29   17.80
theta_cv |    0.06 |    0.05 |    0.00 |    0.27
steps    |  193.27 |    7.18 |  180.00 |  225.00
***** Episode 19840, Mean R = 189.0  Std R = 9.6  Min R = 169.6
PolicyLoss: -0.00953
Policy_Beta: 0.1
Policy_Entropy: 0.251
Policy_KL: 0.00209
Policy_SD: 0.892
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 3.84e+06
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.185


Dynamics: Max Disturbance (m/s^2):  [1.58925643e-12 3.18466002e-13 2.13438271e-12] 2.680064574915074e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.47800953e-13 2.68899340e-12] 3.065518181626573e-12
ADV1:  0.0010186445713135219 0.02858134277452749 0.1780607344484506 -0.38741983420670606
ADV2:  0.017854182074341807 0.6814253620099955 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5001   0.2709   1.5221   5.6264   2.0954   0.9956
ValFun  Gradi

***** Episode 20150, Mean R = 190.0  Std R = 7.8  Min R = 176.4
PolicyLoss: -0.019
Policy_Beta: 0.1
Policy_Entropy: 0.246
Policy_KL: 0.0009
Policy_SD: 0.891
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 3.9e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.14


ADV1:  0.00037319395268483935 0.02830231827394253 0.19607553309910047 -0.3514770905213332
ADV2:  0.01191772980924604 0.7169815976955991 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1406   0.4966   2.2783   5.6264   2.0954   0.9956
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0036   0.0015   0.0065   1.9276   0.8688   0.6518
***** Episode 20181, Mean R = 190.4  Std R = 8.4  Min R = 177.5
PolicyLoss: -0.0192
Policy_Beta: 0.1
Policy_Entropy: 0.231
Policy_KL: 0.00175
Policy_SD: 0.884
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 3.9e+06
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.169


ADV1:  0.0005327475050113095 0.02539554820483296 0.2366437606969508 -0.257800871

pixel_icoords |    0.34    0.21 |    6.86    6.95 |  -17.29  -16.59 |   15.80   15.96
theta_cv |    0.06 |    0.05 |    0.00 |    0.26
steps    |  193.15 |    7.41 |  180.00 |  216.00
***** Episode 20460, Mean R = 190.5  Std R = 7.5  Min R = 173.6
PolicyLoss: -0.00173
Policy_Beta: 0.1
Policy_Entropy: 0.25
Policy_KL: 0.00178
Policy_SD: 0.891
Policy_lr_mult: 1
Steps: 6.04e+03
TotalSteps: 3.96e+06
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.174


ADV1:  0.0001261007455816143 0.028911514055731967 0.2219258473760658 -0.2924136170161999
ADV2:  0.019825320751961955 0.7282017182501062 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0992   0.4426   2.5024   5.6264   2.0954   0.9956
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0007   0.0035   1.9276   0.8688   0.6518
***** Episode 20491, Mean R = 189.0  Std R = 5.8  Min R = 177.5
PolicyLoss: -0.0254
Policy_Beta: 0.1
Policy_Entropy: 0.251
Policy_KL: 0.00136
Policy_SD: 0.899
Policy_lr_mult: 

theta_cv |    0.05 |    0.05 |    0.00 |    0.25
steps    |  193.17 |    7.18 |  180.00 |  221.00
***** Episode 20770, Mean R = 189.3  Std R = 8.6  Min R = 177.2
PolicyLoss: -0.0344
Policy_Beta: 0.1
Policy_Entropy: 0.243
Policy_KL: 0.00151
Policy_SD: 0.894
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 4.02e+06
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.169


ADV1:  -0.0005737413288445381 0.03081386328976841 0.1770610387530271 -0.42387998579916
ADV2:  0.021964490229181333 0.707732340649119 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7714   0.4173   2.1414   5.6264   2.2102   1.2145
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0041   0.0015   0.0070   1.9276   0.8688   0.6518
***** Episode 20801, Mean R = 189.4  Std R = 6.5  Min R = 174.0
PolicyLoss: -0.0246
Policy_Beta: 0.1
Policy_Entropy: 0.245
Policy_KL: 0.00129
Policy_SD: 0.899
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 4.02e+06
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 

tracking_error |  0.0072 |  0.0244 |  0.0000 |  1.1816
optflow_error |  0.0395 |  0.6488 |  0.0000 | 50.1276
pixel_icoords |   -0.08   -0.29 |    6.72    6.85 |  -14.29  -17.05 |   16.97   17.94
theta_cv |    0.06 |    0.05 |    0.00 |    0.27
steps    |  193.27 |    6.76 |  179.00 |  220.00
***** Episode 21080, Mean R = 189.3  Std R = 8.2  Min R = 169.2
PolicyLoss: -0.0251
Policy_Beta: 0.1
Policy_Entropy: 0.259
Policy_KL: 0.00138
Policy_SD: 0.9
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 4.08e+06
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.166


ADV1:  0.0055226469970565606 0.028882313953247037 0.24949992943572763 -0.2954090090221863
ADV2:  -0.05201399702019046 0.7048244283694441 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4643   0.5909   2.8165   5.6264   2.2102   1.2145
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0170   0.0097   0.0362   1.9276   0.8688   0.6518
***** Episode 21111, Mean R = 190.8  Std R = 9.2  Min R = 176.0
Pol

pixel_icoords |    0.76    0.46 |    7.00    6.60 |  -15.17  -15.14 |   15.75   15.53
theta_cv |    0.05 |    0.05 |    0.00 |    0.26
steps    |  193.41 |    6.98 |  181.00 |  215.00
***** Episode 21390, Mean R = 191.2  Std R = 7.2  Min R = 174.6
PolicyLoss: 0.00947
Policy_Beta: 0.1
Policy_Entropy: 0.25
Policy_KL: 0.000851
Policy_SD: 0.895
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 4.14e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.202


ADV1:  -0.002891770523032092 0.03222267178644438 0.21355925789710684 -0.47221676642616833
ADV2:  0.041434548692920466 0.668656596706416 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1270   0.5933   2.9358   5.6264   2.2102   1.2145
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0067   0.0030   0.0129   1.9276   0.8688   0.6518
***** Episode 21421, Mean R = 190.3  Std R = 6.8  Min R = 172.9
PolicyLoss: -0.0336
Policy_Beta: 0.1
Policy_Entropy: 0.241
Policy_KL: 0.00145
Policy_SD: 0.902
Policy_lr_mult: 1

optflow_error |  0.0368 |  0.6355 |  0.0000 | 39.6236
pixel_icoords |    0.17    0.67 |    7.02    6.84 |  -17.91  -15.05 |   16.92   15.31
theta_cv |    0.06 |    0.05 |    0.00 |    0.28
steps    |  193.08 |    6.78 |  180.00 |  214.00
***** Episode 21700, Mean R = 188.3  Std R = 6.4  Min R = 172.4
PolicyLoss: -0.0199
Policy_Beta: 0.1
Policy_Entropy: 0.249
Policy_KL: 0.00123
Policy_SD: 0.9
Policy_lr_mult: 1
Steps: 5.92e+03
TotalSteps: 4.2e+06
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.104


ADV1:  0.005473840793472351 0.036699964388654804 0.5279910185552606 -0.20066032500111144
ADV2:  -0.07441904262183931 0.6165567690884676 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3483   0.8004   3.8532   6.6606   2.4451   1.2145
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0281   0.0166   0.0746   1.9276   0.8688   0.6518
***** Episode 21731, Mean R = 193.0  Std R = 9.4  Min R = 178.8
PolicyLoss: 0.0456
Policy_Beta: 0.1
Policy_Entropy: 0.241
Po

optflow_error |  0.0333 |  0.5787 |  0.0000 | 32.4183
pixel_icoords |   -0.03    0.41 |    6.87    6.39 |  -15.49  -15.26 |   17.94   14.54
theta_cv |    0.05 |    0.04 |    0.00 |    0.27
steps    |  193.11 |    7.02 |  180.00 |  218.00
***** Episode 22010, Mean R = 188.4  Std R = 7.3  Min R = 175.8
PolicyLoss: -0.0212
Policy_Beta: 0.1
Policy_Entropy: 0.238
Policy_KL: 0.00153
Policy_SD: 0.893
Policy_lr_mult: 1
Steps: 5.91e+03
TotalSteps: 4.26e+06
VF_0_ExplainedVarNew: 0.96
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.16


ADV1:  -0.01338212242986022 0.03172127573049256 0.3046987591980037 -0.34734121390710615
ADV2:  0.029295803862563856 0.7742498296819489 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0186   0.4637   2.6134   6.6606   2.4451   1.2145
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0245   0.0151   0.0608   1.9276   0.8688   0.6518
***** Episode 22041, Mean R = 190.3  Std R = 5.7  Min R = 175.1
PolicyLoss: -0.0024
Policy_Beta: 0.1
Policy_Entropy: 0.254
P

tracking_error |  0.0061 |  0.0211 |  0.0000 |  0.9815
optflow_error |  0.0359 |  0.6015 |  0.0000 | 41.9607
pixel_icoords |   -0.14    0.07 |    6.95    6.41 |  -16.17  -16.26 |   16.90   18.23
theta_cv |    0.05 |    0.05 |    0.00 |    0.28
steps    |  193.35 |    7.19 |  179.00 |  219.00
***** Episode 22320, Mean R = 190.8  Std R = 8.1  Min R = 174.5
PolicyLoss: -0.0352
Policy_Beta: 0.1
Policy_Entropy: 0.243
Policy_KL: 0.00123
Policy_SD: 0.888
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 4.32e+06
VF_0_ExplainedVarNew: 0.952
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.165


Dynamics: Max Disturbance (m/s^2):  [1.58925643e-12 3.18466002e-13 2.13438271e-12] 2.680064574915074e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.48772718e-13 2.68899340e-12] 3.0655968871791573e-12
ADV1:  0.004079729331526344 0.02639723966416051 0.3045184101275147 -0.2871434256867169
ADV2:  -0.044476928248570885 0.7072106904524921 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6542   0.358

tracking_error |  0.0063 |  0.0240 |  0.0000 |  1.2245
optflow_error |  0.0407 |  0.7318 |  0.0000 | 55.0747
pixel_icoords |   -0.10   -0.18 |    6.70    6.75 |  -15.21  -16.41 |   17.25   15.03
theta_cv |    0.06 |    0.05 |    0.00 |    0.27
steps    |  193.38 |    6.93 |  179.00 |  214.00
***** Episode 22630, Mean R = 189.8  Std R = 5.7  Min R = 177.4
PolicyLoss: -0.0343
Policy_Beta: 0.1
Policy_Entropy: 0.245
Policy_KL: 0.00145
Policy_SD: 0.889
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 4.38e+06
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.0957


ADV1:  -0.0002751320476163269 0.026857422573095643 0.224953286285 -0.31740370744564794
ADV2:  0.009620059793366926 0.7223542333677532 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6336   0.3632   1.5234   6.6606   2.4451   1.2597
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0012   0.0056   1.9276   0.8688   0.6518
***** Episode 22661, Mean R = 190.8  Std R = 6.4  Min R = 178.7
Pol

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |   -0.02    0.14 |    0.26    1.75 |   -0.60   -3.13 |    0.59    3.13
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.03 |    0.57 |    0.00 |   10.00
tracking_rewards |  190.45 |    7.18 |  170.27 |  212.80
tracking_error |  0.0057 |  0.0191 |  0.0000 |  1.0816
optflow_error |  0.0333 |  0.5583 |  0.0000 | 42.6328
pixel_icoords |   -0.45    0.33 |    6.49    6.91 |  -15.13  -17.92 |   16.14   16.07
theta_cv |    0.06 |    0.05 |    0.00 |    0.27
steps    |  193.07 |    7.02 |  179.00 |  220.00
***** Episode 22940, Mean R = 192.4  Std R = 8.5  Min R = 178.3
PolicyLoss: -0.0306
Policy_Beta: 0.1
Policy_Entropy: 0.257
Policy_KL: 0.000

theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.40 |    7.12 |  179.00 |  213.00
***** Episode 23250, Mean R = 193.5  Std R = 8.1  Min R = 181.8
PolicyLoss: -0.0118
Policy_Beta: 0.1
Policy_Entropy: 0.239
Policy_KL: 0.00175
Policy_SD: 0.874
Policy_lr_mult: 1
Steps: 6.06e+03
TotalSteps: 4.5e+06
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.28


Dynamics: Max Disturbance (m/s^2):  [1.59017843e-12 3.18466002e-13 2.13438271e-12] 2.68061141774913e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.48772718e-13 2.68899340e-12] 3.0655968871791573e-12
ADV1:  0.0018301361724600384 0.029540749858984194 0.28849806861184046 -0.33150329103320963
ADV2:  -0.0007961690252730137 0.6895449979653487 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7463   0.3852   1.4803   6.6606   2.4451   1.3309
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0057   0.0030   0.0111   1.9276   0.8688   0.6518
***** Episode 23281, Mean R = 191.8  Std R = 7.2 

optflow_error |  0.0381 |  0.7099 |  0.0000 | 47.6373
pixel_icoords |   -0.10   -0.34 |    6.90    6.67 |  -16.48  -17.54 |   14.81   17.08
theta_cv |    0.05 |    0.05 |    0.00 |    0.27
steps    |  193.15 |    6.67 |  180.00 |  212.00
***** Episode 23560, Mean R = 193.6  Std R = 7.2  Min R = 175.8
PolicyLoss: -0.00474
Policy_Beta: 0.1
Policy_Entropy: 0.245
Policy_KL: 0.0016
Policy_SD: 0.888
Policy_lr_mult: 1
Steps: 6.08e+03
TotalSteps: 4.56e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.155


ADV1:  -0.0009879666515397703 0.027951464733436546 0.20738400060414708 -0.2841304949673134
ADV2:  0.026030759189044544 0.6880708336093975 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0082   0.5964   3.0234   6.6606   2.4451   1.3309
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0069   0.0038   0.0176   1.9276   0.8688   0.6518
***** Episode 23591, Mean R = 191.7  Std R = 7.1  Min R = 179.1
PolicyLoss: -0.0245
Policy_Beta: 0.1
Policy_Entropy: 0.

optflow_error |  0.0294 |  0.5799 |  0.0000 | 55.8755
pixel_icoords |    0.04   -0.23 |    6.67    6.63 |  -16.23  -15.16 |   17.10   15.77
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.16 |    7.65 |  179.00 |  220.00
***** Episode 23870, Mean R = 191.6  Std R = 9.0  Min R = 178.6
PolicyLoss: -0.00745
Policy_Beta: 0.1
Policy_Entropy: 0.232
Policy_KL: 0.00168
Policy_SD: 0.879
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 4.62e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.236


ADV1:  0.0025923643017662385 0.02611804207242889 0.24291068170437358 -0.2665165056962246
ADV2:  -0.011830466919062484 0.7218219921568104 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9691   1.0358   4.5025   6.6606   2.4451   1.3309
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0057   0.0028   0.0100   1.9276   0.8688   0.6518
***** Episode 23901, Mean R = 191.0  Std R = 7.2  Min R = 177.1
PolicyLoss: -0.00309
Policy_Beta: 0.1
Policy_Entropy: 0

tracking_error |  0.0047 |  0.0191 |  0.0000 |  1.1173
optflow_error |  0.0331 |  0.6462 |  0.0000 | 50.3828
pixel_icoords |   -0.35    0.33 |    6.96    6.86 |  -15.39  -14.29 |   16.92   15.22
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  193.06 |    7.10 |  179.00 |  223.00
***** Episode 24180, Mean R = 190.0  Std R = 7.6  Min R = 177.5
PolicyLoss: -0.0142
Policy_Beta: 0.1
Policy_Entropy: 0.234
Policy_KL: 0.00119
Policy_SD: 0.878
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 4.68e+06
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.146


Dynamics: Max Disturbance (m/s^2):  [1.59017843e-12 3.18466002e-13 2.13438271e-12] 2.68061141774913e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.50880795e-13 2.68899340e-12] 3.065768677351602e-12
ADV1:  0.0017032817601322053 0.024515783542735474 0.328949602103747 -0.343843013048172
ADV2:  -0.018574349474843377 0.6390812537011097 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2128   0.73

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.01    0.12 |    0.28    1.85 |   -0.59   -3.12 |    0.62    3.13
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.03 |    0.57 |    0.00 |   10.00
tracking_rewards |  191.61 |    7.70 |  169.18 |  214.37
tracking_error |  0.0045 |  0.0206 |  0.0000 |  1.0447
optflow_error |  0.0359 |  0.7347 |  0.0000 | 47.7571
pixel_icoords |    0.31   -0.37 |    7.16    6.61 |  -15.77  -17.59 |   17.17   14.54
theta_cv |    0.05 |    0.04 |    0.00 |    0.26
steps    |  193.75 |    7.35 |  179.00 |  216.00
***** Episode 24490, Mean R = 190.7  Std R = 6.5  Min R = 177.8
PolicyLoss: -0.0352
Policy_Beta: 0.1
Policy_Entropy: 0.216
Policy_KL: 0.001

optflow_error |  0.0340 |  0.6863 |  0.0000 | 40.4000
pixel_icoords |   -0.29    0.39 |    6.45    6.18 |  -16.84  -16.56 |   14.93   16.87
theta_cv |    0.05 |    0.04 |    0.00 |    0.26
steps    |  192.35 |    6.62 |  179.00 |  212.00
***** Episode 24800, Mean R = 191.0  Std R = 6.9  Min R = 178.8
PolicyLoss: -0.0151
Policy_Beta: 0.1
Policy_Entropy: 0.23
Policy_KL: 0.00093
Policy_SD: 0.878
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 4.8e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.196


ADV1:  0.0012649641171732027 0.02204695280254496 0.2097388119460598 -0.21901127579380036
ADV2:  -0.012893053154306427 0.6778388819484044 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6303   0.3829   1.6772   6.6606   2.6380   1.4653
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0070   0.0043   0.0164   1.9276   0.8688   0.6518
***** Episode 24831, Mean R = 192.8  Std R = 8.2  Min R = 175.0
PolicyLoss: -0.00401
Policy_Beta: 0.1
Policy_Entropy: 0.21

optflow_error |  0.0275 |  0.5558 |  0.0000 | 43.1305
pixel_icoords |    0.11    0.47 |    6.79    6.74 |  -15.50  -14.29 |   16.90   15.27
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.20 |    6.86 |  179.00 |  212.00
***** Episode 25110, Mean R = 191.8  Std R = 6.4  Min R = 180.2
PolicyLoss: 0.0148
Policy_Beta: 0.1
Policy_Entropy: 0.221
Policy_KL: 0.0012
Policy_SD: 0.877
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 4.86e+06
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.167


ADV1:  0.0027690356079285343 0.024786303219932616 0.23219875168835125 -0.21562248961189412
ADV2:  -0.014799936540466586 0.7345133383681572 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1607   1.0351   5.1063   7.7920   2.6428   1.7781
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0058   0.0035   0.0156   1.9276   0.8688   0.6518
***** Episode 25141, Mean R = 192.7  Std R = 5.7  Min R = 179.6
PolicyLoss: -0.00786
Policy_Beta: 0.1
Policy_Entropy: 0.217

tracking_error |  0.0046 |  0.0209 |  0.0000 |  1.0941
optflow_error |  0.0394 |  0.7965 |  0.0000 | 48.9740
pixel_icoords |    0.22   -0.22 |    6.62    6.94 |  -16.32  -16.63 |   16.54   15.53
theta_cv |    0.05 |    0.04 |    0.00 |    0.23
steps    |  193.47 |    7.09 |  180.00 |  216.00
***** Episode 25420, Mean R = 190.2  Std R = 6.8  Min R = 178.0
PolicyLoss: 0.00828
Policy_Beta: 0.1
Policy_Entropy: 0.237
Policy_KL: 0.000948
Policy_SD: 0.874
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 4.92e+06
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.147


ADV1:  8.694682266741136e-05 0.022087224495134762 0.15980813266627913 -0.3270017481851115
ADV2:  0.017481613204967102 0.7240569804114726 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3259   1.0186   5.3783   7.7920   2.6428   1.7781
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0033   0.0017   0.0070   1.9276   0.8688   0.6518
***** Episode 25451, Mean R = 189.8  Std R = 6.0  Min R = 177.9


tracking_error |  0.0045 |  0.0169 |  0.0000 |  1.1199
optflow_error |  0.0296 |  0.5987 |  0.0000 | 50.9717
pixel_icoords |    0.46   -0.53 |    6.47    6.73 |  -13.48  -15.21 |   16.13   16.62
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  192.62 |    6.35 |  180.00 |  215.00
***** Episode 25730, Mean R = 191.4  Std R = 5.9  Min R = 180.8
PolicyLoss: -0.0171
Policy_Beta: 0.1
Policy_Entropy: 0.237
Policy_KL: 0.00118
Policy_SD: 0.872
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 4.98e+06
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.123


ADV1:  0.0016169517193712903 0.024083358297580373 0.27729779683263206 -0.24607238804154918
ADV2:  -0.010505420781089823 0.720775743699487 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0895   0.4246   2.1852   7.7920   2.6428   1.7781
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0053   0.0027   0.0106   1.9276   0.8688   0.6518
***** Episode 25761, Mean R = 191.3  Std R = 6.2  Min R = 180.5


w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.00    0.02 |    0.27    1.76 |   -0.58   -3.13 |    0.62    3.13
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  191.02 |    7.11 |  170.16 |  217.62
tracking_error |  0.0045 |  0.0179 |  0.0000 |  1.1862
optflow_error |  0.0318 |  0.6501 |  0.0000 | 49.8031
pixel_icoords |    0.15    0.17 |    6.79    6.58 |  -14.53  -14.50 |   17.69   16.13
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.12 |    6.89 |  180.00 |  220.00
***** Episode 26040, Mean R = 190.7  Std R = 8.1  Min R = 176.8
PolicyLoss: -0.0276
Policy_Beta: 0.1
Policy_Entropy: 0.23
Policy_KL: 0.0017

tracking_error |  0.0046 |  0.0183 |  0.0000 |  0.9384
optflow_error |  0.0316 |  0.6288 |  0.0000 | 41.2502
pixel_icoords |   -0.18   -0.28 |    6.58    6.48 |  -15.18  -15.52 |   16.06   17.04
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.43 |    7.38 |  180.00 |  218.00
***** Episode 26350, Mean R = 192.1  Std R = 7.2  Min R = 176.3
PolicyLoss: 0.00368
Policy_Beta: 0.1
Policy_Entropy: 0.225
Policy_KL: 0.000938
Policy_SD: 0.867
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 5.1e+06
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.207


ADV1:  -0.0024556351565579023 0.025136067473767783 0.182678029858505 -0.23226450570738233
ADV2:  0.030499953717610856 0.737715542627966 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.7058   1.2362   6.3042   8.3651   2.7058   1.7781
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0072   0.0031   0.0123   1.9276   0.8688   0.6518
***** Episode 26381, Mean R = 190.0  Std R = 6.2  Min R = 170.5
Pol

tracking_error |  0.0044 |  0.0172 |  0.0000 |  1.0486
optflow_error |  0.0310 |  0.6517 |  0.0000 | 47.9660
pixel_icoords |   -0.74   -0.50 |    6.74    6.85 |  -16.90  -17.32 |   14.22   15.00
theta_cv |    0.05 |    0.04 |    0.00 |    0.27
steps    |  193.60 |    6.78 |  179.00 |  224.00
***** Episode 26660, Mean R = 191.0  Std R = 8.0  Min R = 174.4
PolicyLoss: 0.0236
Policy_Beta: 0.1
Policy_Entropy: 0.258
Policy_KL: 0.0014
Policy_SD: 0.891
Policy_lr_mult: 1
Steps: 6.00e+03
TotalSteps: 5.16e+06
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.187


Dynamics: Max Disturbance (m/s^2):  [1.59017843e-12 3.18466002e-13 2.13438271e-12] 2.68061141774913e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.50880795e-13 2.68899340e-12] 3.065768677351602e-12
ADV1:  0.0006703227097418547 0.022070053807998133 0.2121710461238162 -0.28963251271237733
ADV2:  0.004547452553234939 0.6957141995879542 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9681   0.432

tracking_error |  0.0050 |  0.0183 |  0.0000 |  0.9693
optflow_error |  0.0323 |  0.6014 |  0.0000 | 30.7265
pixel_icoords |    0.49    0.10 |    6.48    6.36 |  -14.05  -16.10 |   13.83   16.82
theta_cv |    0.05 |    0.04 |    0.00 |    0.27
steps    |  193.05 |    6.27 |  179.00 |  212.00
***** Episode 26970, Mean R = 190.4  Std R = 6.4  Min R = 174.9
PolicyLoss: -0.0207
Policy_Beta: 0.1
Policy_Entropy: 0.24
Policy_KL: 0.00134
Policy_SD: 0.882
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 5.22e+06
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.124


ADV1:  -0.0012723528127162667 0.01950056752059183 0.1427752275920724 -0.1680080089935878
ADV2:  0.029470271572651634 0.7128641656187746 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8551   0.5054   2.1633   8.3651   2.7058   1.7781
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0010   0.0048   1.9276   0.8688   0.6518
***** Episode 27001, Mean R = 191.2  Std R = 7.0  Min R = 178.6
Poli

a_f      |   -0.00    0.10 |    0.27    1.74 |   -0.61   -3.11 |    0.67    3.14
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.03 |    0.57 |    0.00 |   10.00
tracking_rewards |  191.36 |    7.14 |  173.95 |  215.42
tracking_error |  0.0045 |  0.0165 |  0.0000 |  1.2535
optflow_error |  0.0277 |  0.5558 |  0.0000 | 56.0065
pixel_icoords |   -0.15    0.00 |    6.73    6.37 |  -15.62  -16.02 |   18.24   17.64
theta_cv |    0.05 |    0.04 |    0.00 |    0.26
steps    |  193.41 |    6.99 |  180.00 |  221.00
***** Episode 27280, Mean R = 189.3  Std R = 6.2  Min R = 174.0
PolicyLoss: -0.0379
Policy_Beta: 0.1
Policy_Entropy: 0.222
Policy_KL: 0.00122
Policy_SD: 0.875
Policy_lr_mult: 1
Steps: 5.93e+03
TotalSteps: 5.28e+06
VF_0_ExplainedVarNew: 0.977
VF_0_Expla

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |   -0.01    0.12 |    0.28    1.77 |   -0.64   -3.12 |    0.67    3.09
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  191.38 |    7.44 |  170.56 |  216.44
tracking_error |  0.0043 |  0.0159 |  0.0000 |  1.0842
optflow_error |  0.0268 |  0.5885 |  0.0000 | 48.4202
pixel_icoords |   -0.21    0.13 |    7.03    6.83 |  -16.49  -15.36 |   17.13   15.17
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.34 |    7.31 |  179.00 |  218.00
***** Episode 27590, Mean R = 190.9  Std R = 8.7  Min R = 179.6
PolicyLoss: -0.00357
Policy_Beta: 0.1
Policy_Entropy: 0.224
Policy_KL: 0.00

tracking_error |  0.0043 |  0.0152 |  0.0000 |  1.2608
optflow_error |  0.0264 |  0.5060 |  0.0000 | 42.0484
pixel_icoords |   -1.06    0.13 |    6.87    6.75 |  -16.34  -15.93 |   14.53   17.14
theta_cv |    0.05 |    0.04 |    0.00 |    0.27
steps    |  193.90 |    7.26 |  180.00 |  217.00
***** Episode 27900, Mean R = 190.7  Std R = 6.8  Min R = 180.2
PolicyLoss: -0.0248
Policy_Beta: 0.1
Policy_Entropy: 0.229
Policy_KL: 0.00239
Policy_SD: 0.88
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 5.4e+06
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.197


ADV1:  0.00292483386943248 0.02513589516812665 0.17078695405445699 -0.32824571352550114
ADV2:  -0.013060622914641955 0.7285039879423222 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.2364   1.6531   6.8693  10.0844   3.9862   2.1500
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0124   0.0057   0.0272   1.9276   0.8688   0.6518
***** Episode 27931, Mean R = 191.8  Std R = 7.6  Min R = 174.1
Polic

a_f      |   -0.01    0.00 |    0.26    1.73 |   -0.55   -3.11 |    0.65    3.13
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.03 |    0.57 |    0.00 |   10.00
tracking_rewards |  191.26 |    7.44 |  172.16 |  211.58
tracking_error |  0.0045 |  0.0160 |  0.0000 |  0.9052
optflow_error |  0.0288 |  0.5608 |  0.0000 | 35.7174
pixel_icoords |   -0.19   -0.12 |    6.46    6.82 |  -15.41  -15.87 |   16.93   17.37
theta_cv |    0.05 |    0.04 |    0.00 |    0.26
steps    |  193.34 |    7.16 |  180.00 |  213.00
***** Episode 28210, Mean R = 191.4  Std R = 8.3  Min R = 173.7
PolicyLoss: -0.019
Policy_Beta: 0.1
Policy_Entropy: 0.243
Policy_KL: 0.00118
Policy_SD: 0.885
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 5.46e+06
VF_0_ExplainedVarNew: 0.981
VF_0_Explai

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.01   -0.06 |    0.26    1.71 |   -0.56   -3.13 |    0.59    3.09
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.03 |    0.57 |    0.00 |   10.00
tracking_rewards |  191.15 |    7.16 |  177.13 |  211.63
tracking_error |  0.0043 |  0.0160 |  0.0000 |  1.0404
optflow_error |  0.0270 |  0.5461 |  0.0000 | 37.0378
pixel_icoords |    0.04    0.55 |    6.56    6.64 |  -14.66  -14.55 |   15.32   14.75
theta_cv |    0.05 |    0.04 |    0.00 |    0.28
steps    |  193.12 |    7.12 |  180.00 |  214.00
***** Episode 28520, Mean R = 191.8  Std R = 5.6  Min R = 181.4
PolicyLoss: -0.0291
Policy_Beta: 0.1
Policy_Entropy: 0.246
Policy_KL: 0.001

optflow_error |  0.0266 |  0.5439 |  0.0000 | 40.6163
pixel_icoords |    0.11    0.53 |    6.95    6.67 |  -16.13  -17.50 |   16.60   15.92
theta_cv |    0.06 |    0.04 |    0.00 |    0.27
steps    |  193.80 |    7.31 |  180.00 |  222.00
***** Episode 28830, Mean R = 190.4  Std R = 7.1  Min R = 180.7
PolicyLoss: 0.00995
Policy_Beta: 0.1
Policy_Entropy: 0.257
Policy_KL: 0.00144
Policy_SD: 0.895
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 5.58e+06
VF_0_ExplainedVarNew: 0.948
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.171


ADV1:  -0.0015873799162152765 0.02558640210893505 0.19098232586809527 -0.4346894294408642
ADV2:  0.022002034141780372 0.6548993655193975 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2893   0.9974   4.7100  10.3496   4.2073   3.2608
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0049   0.0025   0.0109   1.9276   0.8688   0.6518
***** Episode 28861, Mean R = 191.7  Std R = 7.2  Min R = 179.8
PolicyLoss: -0.0127
Policy_Beta: 0.1
Policy_Entropy: 0.2

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |   -0.01    0.02 |    0.28    1.79 |   -0.65   -3.09 |    0.64    3.12
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  191.61 |    7.16 |  169.62 |  218.52
tracking_error |  0.0043 |  0.0161 |  0.0000 |  1.0306
optflow_error |  0.0284 |  0.6052 |  0.0000 | 44.7505
pixel_icoords |   -0.22    0.36 |    7.02    6.84 |  -16.05  -14.88 |   16.20   15.63
theta_cv |    0.05 |    0.04 |    0.00 |    0.23
steps    |  193.59 |    6.92 |  180.00 |  221.00
***** Episode 29140, Mean R = 189.6  Std R = 9.2  Min R = 169.6
PolicyLoss: -0.0198
Policy_Beta: 0.1
Policy_Entropy: 0.259
Policy_KL: 0.000

a_f      |    0.00    0.06 |    0.27    1.78 |   -0.63   -3.13 |    0.64    3.10
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  190.99 |    6.86 |  170.88 |  220.68
tracking_error |  0.0044 |  0.0172 |  0.0000 |  1.0597
optflow_error |  0.0291 |  0.6157 |  0.0000 | 47.4512
pixel_icoords |    0.01   -0.03 |    6.62    6.94 |  -16.01  -14.82 |   16.93   16.51
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  192.98 |    6.75 |  180.00 |  222.00
***** Episode 29450, Mean R = 192.9  Std R = 7.7  Min R = 180.0
PolicyLoss: 0.0131
Policy_Beta: 0.1
Policy_Entropy: 0.255
Policy_KL: 0.00108
Policy_SD: 0.899
Policy_lr_mult: 1
Steps: 6.04e+03
TotalSteps: 5.7e+06
VF_0_ExplainedVarNew: 0.965
VF_0_Explain

tracking_error |  0.0044 |  0.0176 |  0.0000 |  1.1518
optflow_error |  0.0309 |  0.6675 |  0.0000 | 49.7589
pixel_icoords |   -0.04    0.03 |    6.68    7.07 |  -16.25  -15.69 |   15.47   16.49
theta_cv |    0.05 |    0.04 |    0.00 |    0.23
steps    |  193.91 |    6.84 |  179.00 |  215.00
***** Episode 29760, Mean R = 191.0  Std R = 7.5  Min R = 172.5
PolicyLoss: -0.0159
Policy_Beta: 0.1
Policy_Entropy: 0.26
Policy_KL: 0.00187
Policy_SD: 0.901
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 5.76e+06
VF_0_ExplainedVarNew: 0.957
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.144


ADV1:  0.008177145021630458 0.02660048503102595 0.229457621384906 -0.2694264370161457
ADV2:  -0.012978787105454243 0.7024979098600942 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1743   0.6442   2.6238  18.2253   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0169   0.0097   0.0359   1.9276   0.8688   0.6518
***** Episode 29791, Mean R = 191.3  Std R = 6.3  Min R = 180.7
Polic

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.00    0.07 |    0.27    1.87 |   -0.66   -3.14 |    0.60    3.13
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  190.57 |    6.79 |  173.30 |  211.43
tracking_error |  0.0047 |  0.0170 |  0.0000 |  1.2299
optflow_error |  0.0281 |  0.5973 |  0.0000 | 53.9251
pixel_icoords |    0.14   -0.10 |    6.66    6.47 |  -17.29  -16.60 |   15.71   16.57
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  192.70 |    6.71 |  179.00 |  213.00
***** Episode 30070, Mean R = 190.3  Std R = 6.6  Min R = 178.4
PolicyLoss: -0.00713
Policy_Beta: 0.1
Policy_Entropy: 0.262
Policy_KL: 0.00

pixel_icoords |   -0.06    0.29 |    6.89    6.75 |  -16.03  -15.84 |   16.31   16.76
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.42 |    7.22 |  179.00 |  213.00
***** Episode 30380, Mean R = 192.3  Std R = 7.0  Min R = 180.9
PolicyLoss: -0.019
Policy_Beta: 0.1
Policy_Entropy: 0.259
Policy_KL: 0.00115
Policy_SD: 0.893
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 5.88e+06
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.161


ADV1:  0.0008036142115145176 0.023055926876061283 0.22477105300880085 -0.24238996259630535
ADV2:  0.00276881474529952 0.6876419804082006 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3113   0.6982   3.3519  18.2253   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0006   0.0031   1.9276   0.8688   0.6518
***** Episode 30411, Mean R = 191.9  Std R = 8.4  Min R = 173.4
PolicyLoss: -0.0123
Policy_Beta: 0.1
Policy_Entropy: 0.267
Policy_KL: 0.00159
Policy_SD: 0.895
Policy_lr_mult: 1

optflow_error |  0.0287 |  0.5614 |  0.0000 | 39.9327
pixel_icoords |    0.08    0.14 |    6.54    6.96 |  -16.77  -16.12 |   13.97   16.77
theta_cv |    0.05 |    0.04 |    0.00 |    0.26
steps    |  193.29 |    7.37 |  180.00 |  217.00
***** Episode 30690, Mean R = 191.0  Std R = 7.0  Min R = 177.9
PolicyLoss: 0.00314
Policy_Beta: 0.1
Policy_Entropy: 0.255
Policy_KL: 0.00129
Policy_SD: 0.89
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 5.94e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.148


ADV1:  -0.0005364829025311146 0.020291866047176237 0.17363018645973305 -0.28875902546468446
ADV2:  0.015853674028695475 0.6758561274657194 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6905   0.8423   3.3118  18.2253   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0005   0.0026   1.9276   0.8688   0.6518
***** Episode 30721, Mean R = 191.2  Std R = 7.8  Min R = 178.6
PolicyLoss: -0.015
Policy_Beta: 0.1
Policy_Entropy: 0.2

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.03   -0.01 |    0.27    1.72 |   -0.69   -3.07 |    0.63    3.12
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  191.56 |    6.88 |  178.64 |  213.69
tracking_error |  0.0043 |  0.0142 |  0.0000 |  0.9313
optflow_error |  0.0279 |  0.5395 |  0.0000 | 40.8799
pixel_icoords |    0.53   -0.03 |    6.80    7.13 |  -17.17  -16.01 |   16.91   15.69
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.53 |    6.84 |  180.00 |  216.00
***** Episode 31000, Mean R = 193.5  Std R = 7.5  Min R = 179.4
PolicyLoss: -0.00742
Policy_Beta: 0.1
Policy_Entropy: 0.253
Policy_KL: 0.00

optflow_error |  0.0282 |  0.5607 |  0.0000 | 39.1321
pixel_icoords |    0.21   -0.24 |    6.58    6.81 |  -16.62  -15.01 |   15.11   15.21
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  193.38 |    6.79 |  180.00 |  214.00
***** Episode 31310, Mean R = 190.9  Std R = 7.0  Min R = 178.5
PolicyLoss: -0.0303
Policy_Beta: 0.1
Policy_Entropy: 0.252
Policy_KL: 0.000495
Policy_SD: 0.897
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 6.06e+06
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.145


ADV1:  -0.0006332969502157709 0.02315285046780067 0.22297582885719625 -0.3188215899463701
ADV2:  0.011547738542415276 0.6690983673246278 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4636   0.7303   3.1321  18.2253   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0011   0.0048   1.9276   0.8688   0.6518
***** Episode 31341, Mean R = 189.9  Std R = 6.9  Min R = 178.7
PolicyLoss: -0.0103
Policy_Beta: 0.1
Policy_Entropy: 0.

tracking_error |  0.0045 |  0.0150 |  0.0000 |  1.0378
optflow_error |  0.0271 |  0.5663 |  0.0000 | 42.2477
pixel_icoords |    0.03    0.12 |    7.04    7.09 |  -16.83  -15.02 |   14.67   17.76
theta_cv |    0.05 |    0.04 |    0.00 |    0.23
steps    |  193.50 |    7.34 |  181.00 |  224.00
***** Episode 31620, Mean R = 192.0  Std R = 6.3  Min R = 179.3
PolicyLoss: -0.0249
Policy_Beta: 0.1
Policy_Entropy: 0.253
Policy_KL: 0.00128
Policy_SD: 0.895
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 6.12e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.191


ADV1:  -0.0009002694872935505 0.028473449576357068 0.3271587107556313 -0.22639206180374594
ADV2:  -0.007235972699847654 0.6491513738934693 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1296   0.5598   3.1509  18.2253   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0050   0.0020   0.0113   1.9276   0.8688   0.6518
***** Episode 31651, Mean R = 190.9  Std R = 6.1  Min R = 176.0

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |   -0.03    0.07 |    0.27    1.79 |   -0.60   -3.14 |    0.64    3.14
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.03 |    0.57 |    0.00 |   10.00
tracking_rewards |  191.32 |    7.10 |  176.00 |  217.39
tracking_error |  0.0041 |  0.0155 |  0.0000 |  0.9574
optflow_error |  0.0281 |  0.6331 |  0.0000 | 43.4074
pixel_icoords |   -0.68    0.41 |    6.84    6.41 |  -16.88  -15.31 |   15.85   16.83
theta_cv |    0.05 |    0.04 |    0.00 |    0.23
steps    |  193.15 |    7.00 |  180.00 |  219.00
***** Episode 31930, Mean R = 188.1  Std R = 6.1  Min R = 179.3
PolicyLoss: -0.0287
Policy_Beta: 0.1
Policy_Entropy: 0.242
Policy_KL: 0.001

tracking_error |  0.0039 |  0.0142 |  0.0000 |  1.0986
optflow_error |  0.0247 |  0.5746 |  0.0000 | 51.0302
pixel_icoords |    0.26   -0.60 |    6.52    7.06 |  -15.32  -18.37 |   15.74   17.17
theta_cv |    0.05 |    0.04 |    0.00 |    0.22
steps    |  193.31 |    6.62 |  179.00 |  215.00
***** Episode 32240, Mean R = 192.0  Std R = 5.4  Min R = 180.6
PolicyLoss: -0.0131
Policy_Beta: 0.1
Policy_Entropy: 0.237
Policy_KL: 0.00138
Policy_SD: 0.893
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 6.24e+06
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.116


ADV1:  -0.0019789283126603143 0.024576297614918367 0.2464838645090297 -0.2800116986069707
ADV2:  0.01623519879876052 0.6691472527978591 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7554   0.3964   1.6604  18.2253   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0046   0.0024   0.0110   1.9276   0.8688   0.6518
***** Episode 32271, Mean R = 191.5  Std R = 7.2  Min R = 178.4
Pol

optflow_error |  0.0232 |  0.4841 |  0.0000 | 44.5975
pixel_icoords |    0.16   -0.24 |    7.15    6.35 |  -18.29  -17.13 |   17.47   16.06
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  192.78 |    6.58 |  180.00 |  210.00
***** Episode 32550, Mean R = 193.4  Std R = 7.0  Min R = 182.7
PolicyLoss: -0.0165
Policy_Beta: 0.1
Policy_Entropy: 0.235
Policy_KL: 0.00125
Policy_SD: 0.879
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 6.3e+06
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.224


ADV1:  0.002486725795939448 0.022848197498863618 0.20868457853794098 -0.26603659969745475
ADV2:  -0.022534642117243264 0.7392378713755534 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.0377   4.9521  18.8500  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0075   0.0031   0.0129   1.9276   0.8688   0.6518
***** Episode 32581, Mean R = 192.6  Std R = 6.9  Min R = 179.4
PolicyLoss: 0.00224
Policy_Beta: 0.1
Policy_Entropy: 0.22

optflow_error |  0.0252 |  0.5638 |  0.0000 | 44.4191
pixel_icoords |    0.08   -0.58 |    6.54    6.67 |  -14.90  -15.40 |   14.76   15.29
theta_cv |    0.05 |    0.04 |    0.00 |    0.27
steps    |  192.63 |    6.71 |  179.00 |  214.00
***** Episode 32860, Mean R = 191.0  Std R = 6.8  Min R = 178.7
PolicyLoss: -0.0176
Policy_Beta: 0.1
Policy_Entropy: 0.231
Policy_KL: 0.000724
Policy_SD: 0.879
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 6.35e+06
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.122


Dynamics: Max Disturbance (m/s^2):  [1.59098505e-12 3.18466002e-13 2.13438271e-12] 2.681089994997043e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.50880795e-13 2.68899340e-12] 3.065768677351602e-12
ADV1:  0.0021208526224341974 0.023025381841302588 0.23814873785992474 -0.1811615901767094
ADV2:  -0.040327251001850695 0.6690521053980082 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6183   0.5697   2.7851  18.8500   9.1675   5.2382
ValFun  Gra

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.00   -0.08 |    0.26    1.73 |   -0.62   -3.02 |    0.60    3.14
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.03 |    0.57 |    0.00 |   10.00
tracking_rewards |  191.32 |    7.50 |  171.98 |  217.82
tracking_error |  0.0039 |  0.0140 |  0.0000 |  1.0680
optflow_error |  0.0247 |  0.5174 |  0.0000 | 44.5114
pixel_icoords |    0.19    0.24 |    6.50    6.92 |  -16.95  -15.06 |   16.64   16.62
theta_cv |    0.05 |    0.04 |    0.00 |    0.23
steps    |  193.14 |    7.44 |  179.00 |  219.00
***** Episode 33170, Mean R = 192.8  Std R = 5.8  Min R = 182.6
PolicyLoss: 0.0138
Policy_Beta: 0.1
Policy_Entropy: 0.234
Policy_KL: 0.0012

optflow_error |  0.0228 |  0.4879 |  0.0000 | 36.7212
pixel_icoords |   -0.04    0.29 |    6.85    7.05 |  -17.26  -16.98 |   16.60   15.20
theta_cv |    0.05 |    0.04 |    0.00 |    0.23
steps    |  193.44 |    7.76 |  179.00 |  219.00
***** Episode 33480, Mean R = 191.1  Std R = 7.8  Min R = 173.5
PolicyLoss: -0.0253
Policy_Beta: 0.1
Policy_Entropy: 0.247
Policy_KL: 0.00116
Policy_SD: 0.889
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 6.47e+06
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.208


ADV1:  -0.0032553664924736166 0.02805862203276863 0.17896469394062353 -0.3071648331584664
ADV2:  0.03967386317939091 0.7391014571651434 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4725   0.8451   3.3612  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0041   0.0020   0.0086   1.9276   0.8688   0.6518
***** Episode 33511, Mean R = 189.9  Std R = 7.3  Min R = 179.4
PolicyLoss: -0.0256
Policy_Beta: 0.1
Policy_Entropy: 0.24


optflow_error |  0.0253 |  0.5989 |  0.0000 | 42.9343
pixel_icoords |    0.14    0.21 |    6.73    6.58 |  -16.57  -15.02 |   16.31   15.54
theta_cv |    0.05 |    0.04 |    0.00 |    0.23
steps    |  193.09 |    7.13 |  180.00 |  215.00
***** Episode 33790, Mean R = 193.4  Std R = 6.4  Min R = 181.7
PolicyLoss: -0.00241
Policy_Beta: 0.1
Policy_Entropy: 0.233
Policy_KL: 0.00205
Policy_SD: 0.889
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 6.53e+06
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.224


ADV1:  -0.0010953557645795818 0.027428611859266424 0.2512457339848748 -0.25685604046946775
ADV2:  0.004511430632994374 0.7269477750660038 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1840   0.6657   3.6892  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0006   0.0034   1.9276   0.8688   0.6518
***** Episode 33821, Mean R = 190.4  Std R = 5.8  Min R = 178.5
PolicyLoss: -0.00196
Policy_Beta: 0.1
Policy_Entropy: 

pixel_icoords |   -0.56   -0.02 |    6.48    6.70 |  -14.09  -14.30 |   17.87   16.12
theta_cv |    0.05 |    0.04 |    0.00 |    0.26
steps    |  193.46 |    6.81 |  180.00 |  213.00
***** Episode 34100, Mean R = 191.7  Std R = 5.5  Min R = 179.8
PolicyLoss: -0.0212
Policy_Beta: 0.1
Policy_Entropy: 0.233
Policy_KL: 0.00143
Policy_SD: 0.892
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 6.59e+06
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.146


ADV1:  -0.001107219627006858 0.023766155543746826 0.1734921714799581 -0.3035163362420068
ADV2:  0.022446585135815163 0.732728469210083 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4625   1.0637   5.5720  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0053   0.0022   0.0100   1.9276   0.8688   0.6518
***** Episode 34131, Mean R = 189.7  Std R = 5.7  Min R = 178.7
PolicyLoss: -0.0217
Policy_Beta: 0.1
Policy_Entropy: 0.233
Policy_KL: 0.00137
Policy_SD: 0.895
Policy_lr_mult: 1


***** Episode 34410, Mean R = 189.8  Std R = 6.5  Min R = 179.3
PolicyLoss: -0.0297
Policy_Beta: 0.1
Policy_Entropy: 0.24
Policy_KL: 0.00119
Policy_SD: 0.889
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 6.65e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.123


ADV1:  0.0015729772065637983 0.025128880246328256 0.21578245128952445 -0.24309503487786177
ADV2:  -0.017349642061191644 0.7069541124192946 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2810   0.7062   3.7149  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0059   0.0024   0.0093   1.9276   0.8688   0.6518
***** Episode 34441, Mean R = 193.3  Std R = 6.6  Min R = 181.9
PolicyLoss: 0.00413
Policy_Beta: 0.1
Policy_Entropy: 0.253
Policy_KL: 0.000885
Policy_SD: 0.893
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 6.66e+06
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.182


Dynamics: Max Disturbance (m/s^2):  [1.59098505e-12 3.18466002e-13 

tracking_error |  0.0040 |  0.0141 |  0.0000 |  1.1540
optflow_error |  0.0247 |  0.5928 |  0.0000 | 52.6676
pixel_icoords |   -0.05    0.16 |    6.75    6.35 |  -16.56  -15.39 |   16.72   14.22
theta_cv |    0.05 |    0.04 |    0.00 |    0.22
steps    |  193.26 |    6.70 |  179.00 |  211.00
***** Episode 34720, Mean R = 191.9  Std R = 7.8  Min R = 180.5
PolicyLoss: -0.0151
Policy_Beta: 0.1
Policy_Entropy: 0.228
Policy_KL: 0.00103
Policy_SD: 0.879
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 6.71e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.23


ADV1:  0.0006025983010427003 0.02501232053902278 0.21926256701602354 -0.27614412210142414
ADV2:  -0.0008759246836985606 0.7012632712576516 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2214   0.8516   4.5293  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0032   0.0015   0.0067   1.9276   0.8688   0.6518
***** Episode 34751, Mean R = 190.8  Std R = 7.4  Min R = 178.5
Pol

tracking_error |  0.0040 |  0.0136 |  0.0000 |  1.0234
optflow_error |  0.0223 |  0.5347 |  0.0000 | 47.9509
pixel_icoords |   -0.09    0.01 |    6.46    6.82 |  -13.71  -16.03 |   14.97   15.55
theta_cv |    0.05 |    0.04 |    0.00 |    0.23
steps    |  193.01 |    7.22 |  180.00 |  216.00
***** Episode 35030, Mean R = 190.1  Std R = 7.3  Min R = 178.9
PolicyLoss: -0.0265
Policy_Beta: 0.1
Policy_Entropy: 0.222
Policy_KL: 0.00104
Policy_SD: 0.881
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 6.77e+06
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.154


ADV1:  -0.001671437689336929 0.025648252143708725 0.2161560063898913 -0.3268005372402027
ADV2:  0.033429188857442 0.6598388905089576 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3054   0.8281   4.5509  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0202   0.0100   0.0407   1.9276   0.8688   0.6518
***** Episode 35061, Mean R = 189.7  Std R = 6.9  Min R = 178.0
Polic

optflow_error |  0.0215 |  0.5276 |  0.0000 | 45.6931
pixel_icoords |    0.37    0.28 |    6.66    6.52 |  -17.55  -15.20 |   17.37   15.41
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  192.41 |    6.72 |  179.00 |  212.00
***** Episode 35340, Mean R = 192.0  Std R = 7.1  Min R = 181.8
PolicyLoss: 0.0167
Policy_Beta: 0.1
Policy_Entropy: 0.224
Policy_KL: 0.00174
Policy_SD: 0.887
Policy_lr_mult: 1
Steps: 6.00e+03
TotalSteps: 6.83e+06
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.209


ADV1:  -0.001155471697920388 0.023266530435723798 0.18544246087759142 -0.3402186702165162
ADV2:  0.022624390356724467 0.6781051305273998 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0789   0.6652   2.6067  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0015   0.0073   1.9276   0.8688   0.6518
***** Episode 35371, Mean R = 191.3  Std R = 6.8  Min R = 180.8
PolicyLoss: -0.0162
Policy_Beta: 0.1
Policy_Entropy: 0.23

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |   -0.03    0.03 |    0.29    1.88 |   -0.65   -3.13 |    0.58    3.08
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  191.52 |    7.50 |  173.22 |  212.40
tracking_error |  0.0038 |  0.0136 |  0.0000 |  1.1737
optflow_error |  0.0228 |  0.5790 |  0.0000 | 52.8557
pixel_icoords |   -0.65    0.53 |    7.18    6.50 |  -16.90  -15.76 |   15.03   15.38
theta_cv |    0.05 |    0.04 |    0.00 |    0.22
steps    |  193.16 |    7.42 |  179.00 |  214.00
***** Episode 35650, Mean R = 192.3  Std R = 6.6  Min R = 177.5
PolicyLoss: -0.0126
Policy_Beta: 0.1
Policy_Entropy: 0.233
Policy_KL: 0.001

theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.64 |    6.80 |  179.00 |  217.00
***** Episode 35960, Mean R = 190.8  Std R = 4.8  Min R = 182.7
PolicyLoss: -0.00862
Policy_Beta: 0.1
Policy_Entropy: 0.241
Policy_KL: 0.000999
Policy_SD: 0.892
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 6.95e+06
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.0965


ADV1:  0.0028447431107846894 0.02636357537795894 0.286643489610273 -0.2438896187038872
ADV2:  -0.034652681921882776 0.7187705113454217 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5702   0.4845   2.3991  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0125   0.0051   0.0211   1.9276   0.8688   0.6518
***** Episode 35991, Mean R = 191.5  Std R = 5.0  Min R = 180.1
PolicyLoss: 0.0253
Policy_Beta: 0.1
Policy_Entropy: 0.238
Policy_KL: 0.000932
Policy_SD: 0.891
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 6.96e+06
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld

tracking_error |  0.0036 |  0.0119 |  0.0000 |  0.8838
optflow_error |  0.0200 |  0.4689 |  0.0000 | 38.3521
pixel_icoords |    0.50   -0.31 |    6.93    6.97 |  -14.96  -16.03 |   17.16   15.90
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.25 |    6.94 |  179.00 |  216.00
***** Episode 36270, Mean R = 190.7  Std R = 6.7  Min R = 179.8
PolicyLoss: -0.0287
Policy_Beta: 0.1
Policy_Entropy: 0.234
Policy_KL: 0.00128
Policy_SD: 0.892
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 7.01e+06
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.0984


Dynamics: Max Disturbance (m/s^2):  [1.59098505e-12 3.18466002e-13 2.13438271e-12] 2.681089994997043e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.50880795e-13 2.68899340e-12] 3.065768677351602e-12
ADV1:  0.0018936537103785797 0.02684335793792652 0.23269674353708014 -0.25630431967666867
ADV2:  -0.022485549879026942 0.6857273095151594 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2494   

tracking_error |  0.0032 |  0.0119 |  0.0000 |  0.9477
optflow_error |  0.0206 |  0.4495 |  0.0000 | 38.2009
pixel_icoords |    0.22   -0.29 |    7.25    7.08 |  -16.61  -16.13 |   15.23   15.22
theta_cv |    0.05 |    0.04 |    0.00 |    0.23
steps    |  194.39 |    7.91 |  180.00 |  223.00
***** Episode 36580, Mean R = 193.3  Std R = 7.0  Min R = 181.2
PolicyLoss: -0.0101
Policy_Beta: 0.1
Policy_Entropy: 0.232
Policy_KL: 0.00128
Policy_SD: 0.886
Policy_lr_mult: 1
Steps: 6.04e+03
TotalSteps: 7.07e+06
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.206


ADV1:  -0.0007662098494800178 0.02345320610810117 0.20090076452295402 -0.26638700876459215
ADV2:  0.032695370515848046 0.7124893287202982 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5795   0.7522   3.5469  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0032   0.0011   0.0057   1.9276   0.8688   0.6518
***** Episode 36611, Mean R = 191.6  Std R = 5.9  Min R = 180.4


optflow_error |  0.0229 |  0.5721 |  0.0000 | 44.7962
pixel_icoords |   -0.69   -0.09 |    6.79    6.40 |  -17.58  -15.14 |   16.08   14.75
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  192.82 |    6.42 |  180.00 |  216.00
***** Episode 36890, Mean R = 190.6  Std R = 6.4  Min R = 178.8
PolicyLoss: -0.00705
Policy_Beta: 0.1
Policy_Entropy: 0.234
Policy_KL: 0.00148
Policy_SD: 0.887
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 7.13e+06
VF_0_ExplainedVarNew: 0.962
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.153


ADV1:  0.0013492987366755475 0.021777642628897483 0.15202495455741882 -0.23080542186611513
ADV2:  0.007162495721698261 0.7351732737793326 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9888   0.5130   2.5858  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0011   0.0046   1.9276   0.8688   0.6518
***** Episode 36921, Mean R = 189.6  Std R = 6.4  Min R = 178.6
PolicyLoss: -0.0166
Policy_Beta: 0.1
Policy_Entropy: 0

a_f      |    0.02   -0.06 |    0.28    1.87 |   -0.66   -3.10 |    0.57    3.14
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  191.48 |    7.12 |  174.19 |  213.60
tracking_error |  0.0036 |  0.0122 |  0.0000 |  0.9681
optflow_error |  0.0213 |  0.4822 |  0.0000 | 43.1502
pixel_icoords |    0.50   -0.40 |    6.81    6.42 |  -17.27  -16.50 |   15.94   18.33
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.15 |    6.98 |  180.00 |  215.00
***** Episode 37200, Mean R = 190.2  Std R = 5.4  Min R = 178.9
PolicyLoss: -0.00853
Policy_Beta: 0.1
Policy_Entropy: 0.231
Policy_KL: 0.000357
Policy_SD: 0.888
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 7.19e+06
VF_0_ExplainedVarNew: 0.979
VF_0_Exp

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.01    0.00 |    0.27    1.73 |   -0.63   -3.10 |    0.61    3.11
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  191.41 |    7.54 |  177.17 |  213.32
tracking_error |  0.0036 |  0.0153 |  0.0000 |  1.1706
optflow_error |  0.0251 |  0.6182 |  0.0000 | 53.2928
pixel_icoords |    0.06    0.56 |    6.93    6.71 |  -17.11  -14.98 |   15.57   17.84
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  193.13 |    7.46 |  180.00 |  215.00
***** Episode 37510, Mean R = 190.3  Std R = 7.1  Min R = 178.5
PolicyLoss: 0.00598
Policy_Beta: 0.1
Policy_Entropy: 0.249
Policy_KL: 0.000

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.01    0.07 |    0.26    1.78 |   -0.58   -3.13 |    0.61    3.14
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  191.46 |    7.62 |  176.50 |  216.46
tracking_error |  0.0036 |  0.0142 |  0.0000 |  1.3325
optflow_error |  0.0251 |  0.5975 |  0.0000 | 59.1561
pixel_icoords |    0.24   -0.23 |    6.66    6.76 |  -15.05  -17.34 |   15.23   17.51
theta_cv |    0.05 |    0.04 |    0.00 |    0.22
steps    |  193.12 |    7.63 |  180.00 |  219.00
***** Episode 37820, Mean R = 192.9  Std R = 9.3  Min R = 181.4
PolicyLoss: 0.0206
Policy_Beta: 0.1
Policy_Entropy: 0.256
Policy_KL: 0.0007

pixel_icoords |    0.23   -0.22 |    7.01    6.95 |  -14.28  -16.58 |   17.05   15.59
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  193.43 |    6.90 |  180.00 |  215.00
***** Episode 38130, Mean R = 191.1  Std R = 8.3  Min R = 178.3
PolicyLoss: -0.0181
Policy_Beta: 0.1
Policy_Entropy: 0.252
Policy_KL: 0.000695
Policy_SD: 0.893
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 7.37e+06
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.167


ADV1:  0.002291188136989011 0.02393971867255417 0.2371533794177202 -0.24672081945572433
ADV2:  -0.02476455131158665 0.7020727986785908 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2312   0.9693   4.8855  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0054   0.0025   0.0115   1.9276   0.8688   0.6518
***** Episode 38161, Mean R = 192.0  Std R = 8.1  Min R = 178.8
PolicyLoss: 0.00409
Policy_Beta: 0.1
Policy_Entropy: 0.256
Policy_KL: 0.000925
Policy_SD: 0.894
Policy_lr_mult:

tracking_error |  0.0036 |  0.0133 |  0.0000 |  1.0112
optflow_error |  0.0240 |  0.5578 |  0.0000 | 47.7091
pixel_icoords |   -0.74    0.39 |    6.95    6.47 |  -16.26  -17.57 |   15.45   18.68
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.46 |    7.18 |  180.00 |  220.00
***** Episode 38440, Mean R = 192.2  Std R = 9.2  Min R = 177.8
PolicyLoss: -0.00436
Policy_Beta: 0.1
Policy_Entropy: 0.252
Policy_KL: 0.00111
Policy_SD: 0.894
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 7.43e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.187


ADV1:  -5.063884780330296e-06 0.022938671350979135 0.20230061467542207 -0.27574201338345433
ADV2:  0.0004211993721644677 0.7054714479476067 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7966   0.3775   1.5422  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0004   0.0021   1.9276   0.8688   0.6518
***** Episode 38471, Mean R = 190.3  Std R = 6.3  Min R = 178

optflow_error |  0.0201 |  0.4612 |  0.0000 | 43.8767
pixel_icoords |   -0.05   -0.01 |    6.87    6.04 |  -15.99  -14.99 |   15.96   15.29
theta_cv |    0.05 |    0.04 |    0.00 |    0.22
steps    |  192.87 |    6.70 |  180.00 |  210.00
***** Episode 38750, Mean R = 191.4  Std R = 6.9  Min R = 178.3
PolicyLoss: -0.0396
Policy_Beta: 0.1
Policy_Entropy: 0.25
Policy_KL: 0.00097
Policy_SD: 0.887
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 7.49e+06
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.144


ADV1:  -0.0012573855175982938 0.018738414260028494 0.12826592925967029 -0.1986049020945222
ADV2:  0.016451525802110845 0.7315159254469743 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8854   0.6198   3.3591  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0080   0.0060   0.0182   1.9276   0.8688   0.6518
***** Episode 38781, Mean R = 190.5  Std R = 6.7  Min R = 179.9
PolicyLoss: -0.0102
Policy_Beta: 0.1
Policy_Entropy: 0.2

pixel_icoords |    0.11    0.46 |    6.50    6.47 |  -15.59  -15.44 |   16.08   17.34
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  192.83 |    6.62 |  180.00 |  219.00
***** Episode 39060, Mean R = 191.1  Std R = 5.5  Min R = 178.6
PolicyLoss: -0.0263
Policy_Beta: 0.1
Policy_Entropy: 0.244
Policy_KL: 0.00114
Policy_SD: 0.882
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 7.55e+06
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.0672


ADV1:  0.0010387474571379573 0.022585177706428927 0.22966911883374785 -0.20194724086240565
ADV2:  -0.00772989711542487 0.7004828455675186 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7731   0.5113   1.9536  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0010   0.0045   1.9276   0.8688   0.6518
***** Episode 39091, Mean R = 191.4  Std R = 7.5  Min R = 178.8
PolicyLoss: -0.00215
Policy_Beta: 0.1
Policy_Entropy: 0.254
Policy_KL: 0.000715
Policy_SD: 0.889
Policy_lr_m

tracking_error |  0.0036 |  0.0107 |  0.0000 |  0.9162
optflow_error |  0.0185 |  0.3888 |  0.0000 | 42.7016
pixel_icoords |   -0.51    0.55 |    6.74    6.70 |  -16.09  -16.16 |   16.12   16.29
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  193.03 |    7.10 |  180.00 |  215.00
***** Episode 39370, Mean R = 191.0  Std R = 6.5  Min R = 178.7
PolicyLoss: -0.0403
Policy_Beta: 0.1
Policy_Entropy: 0.251
Policy_KL: 0.00136
Policy_SD: 0.888
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 7.61e+06
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.193


ADV1:  -0.0003755064837483002 0.021870962971896668 0.1839429501684115 -0.29889004647671014
ADV2:  -0.002305881640310002 0.7218252357260686 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0654   0.5120   2.4124  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0008   0.0041   1.9276   0.8688   0.6518
***** Episode 39401, Mean R = 190.3  Std R = 6.3  Min R = 180.3


theta_cv |    0.05 |    0.04 |    0.00 |    0.26
steps    |  192.36 |    6.64 |  180.00 |  212.00
***** Episode 39680, Mean R = 190.4  Std R = 6.9  Min R = 177.6
PolicyLoss: -0.0111
Policy_Beta: 0.1
Policy_Entropy: 0.255
Policy_KL: 0.00116
Policy_SD: 0.887
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 7.67e+06
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.151


Dynamics: Max Disturbance (m/s^2):  [1.59578370e-12 3.18466002e-13 2.13438271e-12] 2.6839403379532313e-12
Dynamics: Max Disturbance (m/s^2):  [1.45096896e-12 2.50880795e-13 2.68899340e-12] 3.065768677351602e-12
ADV1:  0.001535368746960512 0.018148647951147713 0.15797838082223514 -0.1576821668710765
ADV2:  -0.02204982963147172 0.7289199443694502 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6802   1.0484   4.2374  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0044   0.0023   0.0087   1.9276   0.8688   0.6518
***** Episode 39711, Mean R = 191.4  Std R = 6.5  

tracking_error |  0.0036 |  0.0117 |  0.0000 |  0.8642
optflow_error |  0.0197 |  0.4410 |  0.0000 | 37.6329
pixel_icoords |   -0.18    0.44 |    6.73    6.84 |  -15.27  -17.93 |   16.34   16.79
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.20 |    7.30 |  180.00 |  222.00
***** Episode 39990, Mean R = 191.3  Std R = 6.6  Min R = 179.8
PolicyLoss: -0.00111
Policy_Beta: 0.1
Policy_Entropy: 0.26
Policy_KL: 0.00138
Policy_SD: 0.893
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 7.73e+06
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.168


ADV1:  0.0013431717994081998 0.021386434525881588 0.22583984363242637 -0.14997972329653886
ADV2:  -0.03932420122473843 0.6833874117202707 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4847   0.2258   0.9882  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0070   0.0044   0.0150   1.9276   0.8688   0.6518
***** Episode 40021, Mean R = 194.7  Std R = 6.6  Min R = 182.3
P

tracking_error |  0.0036 |  0.0125 |  0.0000 |  1.0042
optflow_error |  0.0227 |  0.5235 |  0.0000 | 47.1196
pixel_icoords |    0.17    0.71 |    6.93    6.96 |  -16.83  -14.88 |   17.58   16.56
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.70 |    6.96 |  180.00 |  218.00
***** Episode 40300, Mean R = 192.6  Std R = 7.3  Min R = 178.7
PolicyLoss: -0.0145
Policy_Beta: 0.1
Policy_Entropy: 0.246
Policy_KL: 0.000847
Policy_SD: 0.887
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 7.79e+06
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.161


ADV1:  0.00010452858437086222 0.01916660454928672 0.1483716706817919 -0.2626919682114086
ADV2:  0.0063496102674630624 0.6941560865050669 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2079   0.7408   2.8177  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0003   0.0013   1.9276   0.8688   0.6518
***** Episode 40331, Mean R = 194.0  Std R = 8.6  Min R = 179.8


theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  192.96 |    7.73 |  179.00 |  228.00
***** Episode 40610, Mean R = 192.3  Std R = 6.7  Min R = 182.5
PolicyLoss: -0.000469
Policy_Beta: 0.1
Policy_Entropy: 0.237
Policy_KL: 0.00158
Policy_SD: 0.883
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 7.85e+06
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.166


Dynamics: Max Disturbance (m/s^2):  [1.59578370e-12 3.18466002e-13 2.13438271e-12] 2.6839403379532313e-12
Dynamics: Max Disturbance (m/s^2):  [1.45649142e-12 2.50880795e-13 2.68899340e-12] 3.0683862075048843e-12
ADV1:  -0.0009624218486998435 0.018165725308937262 0.1389421366519683 -0.19611236100057472
ADV2:  0.02761771724513374 0.7045110120865753 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5691   0.7431   3.6053  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0017   0.0076   1.9276   0.8688   0.6518
***** Episode 40641, Mean R = 192.1  Std R = 7

tracking_error |  0.0034 |  0.0127 |  0.0000 |  1.2155
optflow_error |  0.0209 |  0.5453 |  0.0000 | 56.6981
pixel_icoords |   -0.45    0.26 |    6.75    7.23 |  -15.69  -16.63 |   15.81   15.85
theta_cv |    0.05 |    0.04 |    0.00 |    0.22
steps    |  192.99 |    7.23 |  179.00 |  215.00
***** Episode 40920, Mean R = 190.0  Std R = 7.7  Min R = 179.4
PolicyLoss: -0.0278
Policy_Beta: 0.1
Policy_Entropy: 0.238
Policy_KL: 0.000938
Policy_SD: 0.881
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 7.91e+06
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.176


ADV1:  0.003988781942383174 0.02032948166730902 0.18715955976557974 -0.15310683190619334
ADV2:  -0.05555863473223569 0.7050566977578012 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.8059   0.9306   4.1360  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0135   0.0073   0.0279   1.9276   0.8688   0.6518
***** Episode 40951, Mean R = 191.4  Std R = 6.8  Min R = 178.9
P

a_f      |    0.04    0.02 |    0.27    1.79 |   -0.60   -3.13 |    0.65    3.12
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.03 |    0.57 |    0.00 |   10.00
tracking_rewards |  190.81 |    7.23 |  173.12 |  216.74
tracking_error |  0.0037 |  0.0137 |  0.0000 |  1.1258
optflow_error |  0.0212 |  0.5277 |  0.0000 | 50.5697
pixel_icoords |    0.92    0.68 |    6.72    6.47 |  -16.09  -15.51 |   17.76   14.87
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  192.50 |    7.11 |  179.00 |  218.00
***** Episode 41230, Mean R = 191.2  Std R = 5.2  Min R = 181.8
PolicyLoss: 0.0259
Policy_Beta: 0.1
Policy_Entropy: 0.253
Policy_KL: 0.00128
Policy_SD: 0.889
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 7.97e+06
VF_0_ExplainedVarNew: 0.986
VF_0_Explai

theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  193.33 |    7.22 |  180.00 |  219.00
***** Episode 41540, Mean R = 190.8  Std R = 7.0  Min R = 180.6
PolicyLoss: -0.0141
Policy_Beta: 0.1
Policy_Entropy: 0.241
Policy_KL: 0.000991
Policy_SD: 0.885
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 8.03e+06
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.123


ADV1:  0.003065720190038747 0.022528041488625743 0.19031616482754088 -0.16475225680817518
ADV2:  -0.050392764103864646 0.7226969607932323 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1457   0.6739   3.3485  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0063   0.0034   0.0141   1.9276   0.8688   0.6518
***** Episode 41571, Mean R = 193.1  Std R = 8.5  Min R = 181.5
PolicyLoss: 0.018
Policy_Beta: 0.1
Policy_Entropy: 0.243
Policy_KL: 0.00111
Policy_SD: 0.875
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 8.04e+06
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarO

optflow_error |  0.0188 |  0.4540 |  0.0000 | 48.5670
pixel_icoords |    0.47   -0.46 |    6.67    6.59 |  -14.68  -17.04 |   16.12   15.37
theta_cv |    0.05 |    0.04 |    0.00 |    0.22
steps    |  193.33 |    6.96 |  180.00 |  217.00
***** Episode 41850, Mean R = 191.8  Std R = 6.8  Min R = 181.8
PolicyLoss: -0.0229
Policy_Beta: 0.1
Policy_Entropy: 0.247
Policy_KL: 0.00108
Policy_SD: 0.877
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 8.09e+06
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.187


ADV1:  -0.001459045053653755 0.01823149743374259 0.11482527420001354 -0.2814419501687293
ADV2:  0.02959820053001308 0.7586919369724062 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.3167   3.8948  15.0687  18.8500   9.1675   5.2382
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0009   0.0036   1.9276   0.8688   0.6518
***** Episode 41881, Mean R = 192.5  Std R = 6.1  Min R = 181.5
PolicyLoss: -0.0242
Policy_Beta: 0.1
Policy_Entropy: 0.236
Po

optflow_error |  0.0228 |  0.6053 |  0.0000 | 52.8730
pixel_icoords |    0.35    0.06 |    6.67    6.89 |  -16.50  -15.84 |   16.82   17.79
theta_cv |    0.05 |    0.04 |    0.00 |    0.23
steps    |  193.64 |    7.74 |  180.00 |  222.00
***** Episode 42160, Mean R = 192.0  Std R = 5.9  Min R = 180.9
PolicyLoss: 0.0103
Policy_Beta: 0.1
Policy_Entropy: 0.255
Policy_KL: 0.00134
Policy_SD: 0.883
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 8.15e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.12


ADV1:  -0.0021263203812442095 0.019221142610793947 0.26092665605936227 -0.18662463095569892
ADV2:  0.028351049144971673 0.6749768686522462 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.2746   2.2547   8.6457  18.8500   9.1675   6.1659
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0032   0.0014   0.0069   1.9276   0.8688   0.6518
***** Episode 42191, Mean R = 190.9  Std R = 5.9  Min R = 180.5
PolicyLoss: -0.0137
Policy_Beta: 0.1
Policy_Entropy: 0.2

***** Episode 42470, Mean R = 191.2  Std R = 6.4  Min R = 179.7
PolicyLoss: -0.00886
Policy_Beta: 0.1
Policy_Entropy: 0.234
Policy_KL: 0.000798
Policy_SD: 0.88
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 8.21e+06
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.0922


ADV1:  -0.00503968284708723 0.026090864629554317 0.2862741774830053 -0.331731247344361
ADV2:  0.042139587645916764 0.693789420044409 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4067   0.8909   4.6656  18.8500   9.1675   6.1659
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0059   0.0034   0.0124   1.9276   0.8688   0.6518
***** Episode 42501, Mean R = 188.3  Std R = 6.6  Min R = 172.5
PolicyLoss: -0.0241
Policy_Beta: 0.1
Policy_Entropy: 0.263
Policy_KL: 0.00101
Policy_SD: 0.895
Policy_lr_mult: 1
Steps: 5.89e+03
TotalSteps: 8.22e+06
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.106


ADV1:  0.001505463839730754 0.02022211649485915 0.1685098218400769 -0.

tracking_error |  0.0033 |  0.0132 |  0.0000 |  0.9498
optflow_error |  0.0227 |  0.5556 |  0.0000 | 43.7693
pixel_icoords |    0.13    0.08 |    6.91    6.89 |  -16.09  -17.02 |   16.70   17.92
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  193.37 |    7.50 |  179.00 |  223.00
***** Episode 42780, Mean R = 192.2  Std R = 7.9  Min R = 178.7
PolicyLoss: -0.0225
Policy_Beta: 0.1
Policy_Entropy: 0.251
Policy_KL: 0.000993
Policy_SD: 0.892
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 8.27e+06
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.143


ADV1:  -0.000775666647794473 0.01895380752331643 0.13070604143830944 -0.15925002747606665
ADV2:  0.01715115108223079 0.7545857823142655 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9676   0.5494   2.8887  18.8500   9.1675   6.1659
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0007   0.0033   1.9276   0.8688   0.6518
***** Episode 42811, Mean R = 189.9  Std R = 6.2  Min R = 180.8
P

tracking_error |  0.0033 |  0.0110 |  0.0000 |  0.9341
optflow_error |  0.0188 |  0.4616 |  0.0000 | 42.7544
pixel_icoords |    0.79   -0.01 |    6.54    6.69 |  -14.89  -14.49 |   15.43   15.27
theta_cv |    0.05 |    0.04 |    0.00 |    0.23
steps    |  192.64 |    6.54 |  180.00 |  210.00
***** Episode 43090, Mean R = 190.6  Std R = 6.2  Min R = 181.8
PolicyLoss: -0.00723
Policy_Beta: 0.1
Policy_Entropy: 0.255
Policy_KL: 0.00147
Policy_SD: 0.886
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 8.33e+06
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.15


Dynamics: Max Disturbance (m/s^2):  [1.59578370e-12 3.18466002e-13 2.13438271e-12] 2.6839403379532313e-12
Dynamics: Max Disturbance (m/s^2):  [1.45649142e-12 2.50880795e-13 2.68899340e-12] 3.0683862075048843e-12
ADV1:  0.0016829806474197048 0.019888655685933312 0.14857924321555938 -0.2336155036895206
ADV2:  -0.011378176655695946 0.740525130953208 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4813  

optflow_error |  0.0181 |  0.4014 |  0.0000 | 39.8125
pixel_icoords |    0.48    0.01 |    6.72    6.71 |  -15.74  -16.86 |   16.05   16.17
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  192.83 |    7.05 |  180.00 |  214.00
***** Episode 43400, Mean R = 191.3  Std R = 5.9  Min R = 180.9
PolicyLoss: -0.00318
Policy_Beta: 0.1
Policy_Entropy: 0.262
Policy_KL: 0.000415
Policy_SD: 0.895
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 8.39e+06
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.0651


ADV1:  0.002327913454965824 0.018543695021897167 0.12364499844116805 -0.22969800440758759
ADV2:  -0.024390724689910517 0.7184577940939264 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1490   0.8990   4.5783  18.8500   9.1675   6.1659
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0036   0.0019   0.0080   1.9276   0.8688   0.6518
***** Episode 43431, Mean R = 193.6  Std R = 7.0  Min R = 181.8
PolicyLoss: -0.00698
Policy_Beta: 0.1
Policy_Entropy

tracking_error |  0.0032 |  0.0111 |  0.0000 |  0.8086
optflow_error |  0.0194 |  0.4705 |  0.0000 | 37.2135
pixel_icoords |   -0.17    0.45 |    6.79    6.29 |  -16.20  -16.87 |   13.60   15.79
theta_cv |    0.05 |    0.04 |    0.00 |    0.22
steps    |  193.06 |    6.67 |  179.00 |  215.00
***** Episode 43710, Mean R = 191.8  Std R = 7.3  Min R = 177.8
PolicyLoss: -0.0248
Policy_Beta: 0.1
Policy_Entropy: 0.232
Policy_KL: 0.00105
Policy_SD: 0.878
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 8.45e+06
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.146


ADV1:  -0.00032035434506877294 0.017231786985152184 0.16726022713622646 -0.213120156725912
ADV2:  0.009110492114844012 0.7221725315515938 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.7868   1.8776   7.2946  18.8500   9.1675   6.1659
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0007   0.0029   1.9276   0.8688   0.6518
***** Episode 43741, Mean R = 191.3  Std R = 7.0  Min R = 179.8


w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  191.17 |    7.02 |  170.16 |  209.62
tracking_error |  0.0036 |  0.0120 |  0.0000 |  1.2304
optflow_error |  0.0174 |  0.4557 |  0.0000 | 56.1041
pixel_icoords |    0.72   -0.11 |    6.69    6.84 |  -17.84  -16.65 |   14.75   16.66
theta_cv |    0.05 |    0.04 |    0.00 |    0.27
steps    |  192.79 |    6.83 |  180.00 |  211.00
***** Episode 44020, Mean R = 191.7  Std R = 6.8  Min R = 170.2
PolicyLoss: 0.00543
Policy_Beta: 0.1
Policy_Entropy: 0.254
Policy_KL: 0.000935
Policy_SD: 0.885
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 8.51e+06
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.119


Dynamics: Max Disturbance (m/s^2):  [1.595783

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |   -0.03   -0.04 |    0.27    1.82 |   -0.63   -3.13 |    0.59    3.10
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  192.18 |    7.09 |  178.39 |  212.38
tracking_error |  0.0035 |  0.0110 |  0.0000 |  0.8330
optflow_error |  0.0183 |  0.4427 |  0.0000 | 38.9350
pixel_icoords |   -0.65   -0.10 |    6.85    6.86 |  -16.20  -16.21 |   15.34   16.17
theta_cv |    0.05 |    0.04 |    0.00 |    0.26
steps    |  193.77 |    7.06 |  180.00 |  214.00
***** Episode 44330, Mean R = 193.2  Std R = 4.8  Min R = 179.2
PolicyLoss: 0.00255
Policy_Beta: 0.1
Policy_Entropy: 0.252
Policy_KL: 0.000

tracking_error |  0.0036 |  0.0126 |  0.0000 |  1.0205
optflow_error |  0.0202 |  0.5127 |  0.0000 | 46.5975
pixel_icoords |   -0.04    0.35 |    6.59    7.43 |  -15.87  -17.62 |   16.11   16.70
theta_cv |    0.05 |    0.04 |    0.00 |    0.22
steps    |  193.83 |    7.82 |  179.00 |  222.00
***** Episode 44640, Mean R = 192.3  Std R = 6.5  Min R = 181.8
PolicyLoss: -0.0181
Policy_Beta: 0.1
Policy_Entropy: 0.248
Policy_KL: 0.0014
Policy_SD: 0.88
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 8.63e+06
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.144


ADV1:  0.0001662720076011896 0.01682935296818332 0.13090812482588426 -0.15008822045729242
ADV2:  0.01092139756339571 0.7433983139553546 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.2876   1.3509   6.4563  18.8500   9.1675   6.1659
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0003   0.0015   1.9276   0.8688   0.6518
***** Episode 44671, Mean R = 191.1  Std R = 5.5  Min R = 179.8
Poli

KeyboardInterrupt: 

In [7]:
fname = "Optimize-navp=6"
policy.save_params(fname)
value_function.save_params(fname)
env.rl_stats.save_history(fname)
policy.save_params(fname)

# Test Policy


In [6]:

env.test_policy_batch(agent,5000,print_every=100,test_mode=True)

worked 1
Dynamics: Max Disturbance (m/s^2):  [1.59710130e-12 2.93244415e-13 2.12523559e-12] 2.6745749494614436e-12
Dynamics: Max Disturbance (m/s^2):  [1.47542718e-12 2.56524096e-13 2.69091163e-12] 3.0795609059722763e-12
i (et): 100  (             324)
Cumulative Stats (mean,std,max,argmax)
thrust   |1926.84 |2578.99 |  0.00 |6936.72 |     1
 
Final Stats (mean,std,min,max)
hit_reward |    10.0 |     0.0 |    10.0 |    10.0
hit_100cm |    1.00 |    0.00 |    1.00 |    1.00
hit_50cm |    1.00 |    0.00 |    1.00 |    1.00
norm_vf  |6808.611 | 150.095 |6294.176 |7013.267
norm_rf  |     0.3 |     0.1 |     0.1 |     0.5
position | 30671.3    55.8 50014.3 |  1031.8  3006.5  3326.3 | 28412.9 -5374.5 44696.4 | 34123.6  5383.3 55818.8
velocity |-2780.258  -4.287  88.044 | 167.454 829.564 780.219 |-3021.893-1567.311-1392.842 |-2273.2531690.4811699.630
fuel     |  7.89 |  2.80 |  2.48 | 12.77
attitude_321 |  -0.089  -0.020   0.107 |   2.894   0.264   1.716 |  -3.139  -0.572  -3.110 |   3.120   

i (et): 900  (             328)
Cumulative Stats (mean,std,max,argmax)
thrust   |1942.52 |2572.57 |  0.00 |6936.72 |   217
 
Final Stats (mean,std,min,max)
hit_reward |     9.9 |     0.8 |     0.0 |    10.0
hit_100cm |    1.00 |    0.00 |    1.00 |    1.00
hit_50cm |    0.99 |    0.08 |    0.00 |    1.00
norm_vf  |6822.325 | 156.573 |5983.956 |7020.315
norm_rf  |     0.3 |     0.1 |     0.0 |     0.5
position | 30520.6   101.0 49814.4 |  1081.3  3092.5  3080.0 | 28131.8 -5737.1 44226.0 | 34298.0  5922.8 55818.8
velocity |-2792.346  44.910 -43.688 | 170.655 787.527 780.803 |-3021.893-1694.737-1830.068 |-1890.5801799.2921801.263
fuel     |  7.88 |  2.68 |  2.36 | 13.82
attitude_321 |   0.174   0.013  -0.002 |   2.907   0.267   1.778 |  -3.141  -0.619  -3.137 |   3.141   0.648   3.136
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.59710130e-12 2.93244415e-13 2.12523559e-12] 2.674574949

Dynamics: Max Disturbance (m/s^2):  [1.59710130e-12 2.93244415e-13 2.12523559e-12] 2.6745749494614436e-12
Dynamics: Max Disturbance (m/s^2):  [1.47542718e-12 2.56524096e-13 2.69091163e-12] 3.0795609059722763e-12
i (et): 1700  (             339)
Cumulative Stats (mean,std,max,argmax)
thrust   |1955.99 |2579.48 |  0.00 |6936.72 |   217
 
Final Stats (mean,std,min,max)
hit_reward |     9.9 |     1.1 |     0.0 |    10.0
hit_100cm |    1.00 |    0.00 |    1.00 |    1.00
hit_50cm |    0.99 |    0.11 |    0.00 |    1.00
norm_vf  |6821.566 | 161.395 |5983.956 |7022.511
norm_rf  |     0.3 |     0.1 |     0.0 |     0.6
position | 30517.6   141.0 49950.1 |  1087.9  3100.7  3150.3 | 28131.8 -6036.7 44226.0 | 34734.9  5924.9 55826.6
velocity |-2792.956  48.870 -15.142 | 173.908 792.098 774.702 |-3022.882-1694.737-1830.068 |-1890.5801799.2921801.263
fuel     |  7.95 |  2.70 |  1.86 | 14.04
attitude_321 |   0.180   0.005  -0.013 |   2.904   0.264   1.739 |  -3.141  -0.629  -3.141 |   3.142   0.648   

Dynamics: Max Disturbance (m/s^2):  [1.59710130e-12 2.93244415e-13 2.12523559e-12] 2.6745749494614436e-12
Dynamics: Max Disturbance (m/s^2):  [1.47542718e-12 2.56524096e-13 2.69091163e-12] 3.0795609059722763e-12
i (et): 2500  (             341)
Cumulative Stats (mean,std,max,argmax)
thrust   |1945.27 |2576.80 |  0.00 |6936.72 |   217
 
Final Stats (mean,std,min,max)
hit_reward |     9.9 |     1.0 |     0.0 |    10.0
hit_100cm |    1.00 |    0.00 |    1.00 |    1.00
hit_50cm |    0.99 |    0.10 |    0.00 |    1.00
norm_vf  |6820.837 | 161.730 |5971.120 |7022.666
norm_rf  |     0.3 |     0.1 |     0.0 |     0.6
position | 30530.5    45.3 49960.8 |  1095.3  3106.7  3126.5 | 28007.4 -6104.1 43682.8 | 35427.2  5924.9 55840.4
velocity |-2791.998   6.931 -26.560 | 174.503 797.591 773.469 |-3022.882-1819.718-1830.068 |-1890.5801799.2921801.263
fuel     |  7.92 |  2.73 |  1.86 | 14.36
attitude_321 |   0.064   0.009  -0.024 |   2.906   0.265   1.734 |  -3.142  -0.635  -3.141 |   3.142   0.662   

i (et): 3300  (             326)
Cumulative Stats (mean,std,max,argmax)
thrust   |1944.18 |2575.86 |  0.00 |6936.72 |   217
 
Final Stats (mean,std,min,max)
hit_reward |     9.9 |     0.9 |     0.0 |    10.0
hit_100cm |    1.00 |    0.00 |    1.00 |    1.00
hit_50cm |    0.99 |    0.09 |    0.00 |    1.00
norm_vf  |6820.803 | 160.773 |5971.120 |7022.666
norm_rf  |     0.3 |     0.1 |     0.0 |     0.6
position | 30521.1    12.9 49978.1 |  1081.0  3103.5  3133.5 | 28007.4 -6104.1 43682.8 | 35427.2  5924.9 55840.4
velocity |-2791.906   4.083 -23.493 | 173.636 790.680 780.984 |-3022.882-1819.718-1830.068 |-1890.5801799.2921803.455
fuel     |  7.91 |  2.73 |  1.86 | 14.44
attitude_321 |   0.040   0.008  -0.026 |   2.909   0.268   1.750 |  -3.142  -0.652  -3.141 |   3.142   0.662   3.140
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.59710130e-12 2.93244415e-13 2.12523559e-12] 2.67457494

Dynamics: Max Disturbance (m/s^2):  [1.59710130e-12 2.93244415e-13 2.12523559e-12] 2.6745749494614436e-12
Dynamics: Max Disturbance (m/s^2):  [1.47751060e-12 2.56524096e-13 2.69091163e-12] 3.080559621785959e-12
i (et): 4100  (             337)
Cumulative Stats (mean,std,max,argmax)
thrust   |1941.85 |2575.15 |  0.00 |6936.72 |   217
 
Final Stats (mean,std,min,max)
hit_reward |     9.9 |     0.9 |     0.0 |    10.0
hit_100cm |    1.00 |    0.00 |    1.00 |    1.00
hit_50cm |    0.99 |    0.09 |    0.00 |    1.00
norm_vf  |6818.219 | 160.146 |5971.120 |7028.575
norm_rf  |     0.3 |     0.1 |     0.0 |     0.6
position | 30527.3     5.6 49976.1 |  1083.3  3110.2  3126.7 | 28007.4 -6104.1 43682.8 | 35427.2  5924.9 55840.4
velocity |-2789.598   2.581 -20.105 | 173.571 792.132 787.687 |-3022.882-1819.718-1830.068 |-1890.5801799.2921803.455
fuel     |  7.90 |  2.73 |  1.76 | 14.44
attitude_321 |   0.024   0.007  -0.024 |   2.908   0.271   1.754 |  -3.142  -0.652  -3.141 |   3.142   0.662   3

Dynamics: Max Disturbance (m/s^2):  [1.59710130e-12 2.93244415e-13 2.12523559e-12] 2.6745749494614436e-12
Dynamics: Max Disturbance (m/s^2):  [1.47751060e-12 2.56524096e-13 2.69091163e-12] 3.080559621785959e-12
i (et): 4900  (             298)
Cumulative Stats (mean,std,max,argmax)
thrust   |1946.26 |2576.15 |  0.00 |6936.72 |   217
 
Final Stats (mean,std,min,max)
hit_reward |     9.9 |     1.0 |     0.0 |    10.0
hit_100cm |    1.00 |    0.00 |    1.00 |    1.00
hit_50cm |    0.99 |    0.10 |    0.00 |    1.00
norm_vf  |6816.864 | 162.030 |5971.120 |7028.852
norm_rf  |     0.3 |     0.1 |     0.0 |     0.6
position | 30535.4    24.4 49985.3 |  1092.3  3120.9  3128.4 | 28007.4 -6104.1 43682.8 | 35427.2  5958.6 55840.4
velocity |-2788.408   2.990 -13.456 | 175.232 795.333 788.569 |-3028.711-1819.718-1830.068 |-1890.5801799.2921818.849
fuel     |  7.92 |  2.73 |  1.76 | 14.57
attitude_321 |   0.015   0.005  -0.021 |   2.907   0.271   1.751 |  -3.142  -0.652  -3.141 |   3.142   0.662   3

In [5]:
env.test_policy_batch(agent,5000,print_every=100,test_mode=False)

worked 1
Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.74950343e-13 2.11739863e-12] 2.6615070643765496e-12
Dynamics: Max Disturbance (m/s^2):  [1.45092865e-12 2.44000809e-13 2.69017990e-12] 3.0662351949503933e-12
i (et): 100  (             217)
Cumulative Stats (mean,std,max,argmax)
thrust   |2078.14 |2629.16 |  0.00 |6936.72 |    21
 
Final Stats (mean,std,min,max)
hit_reward |     7.1 |     4.5 |     0.0 |    10.0
hit_100cm |    1.00 |    0.00 |    1.00 |    1.00
hit_50cm |    0.71 |    0.45 |    0.00 |    1.00
norm_vf  |6813.138 | 140.327 |6415.352 |7010.607
norm_rf  |     0.4 |     0.2 |     0.1 |     0.9
position | 30658.2  -431.5 49384.0 |  1101.6  2817.1  3204.7 | 28457.8 -5459.9 44487.5 | 33029.8  5254.4 55633.7
velocity |-2784.341  10.476 -48.853 | 157.753 762.764 831.041 |-2999.708-1591.469-1659.957 |-2321.4371491.1551519.196
fuel     |  8.56 |  3.28 |  2.93 | 19.73
attitude_321 |   0.155   0.022  -0.206 |   2.915   0.290   1.832 |  -3.126  -0.548  -3.121 |   3.136   

Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.74950343e-13 2.11739863e-12] 2.6615070643765496e-12
Dynamics: Max Disturbance (m/s^2):  [1.45092865e-12 2.44000809e-13 2.69017990e-12] 3.0662351949503933e-12
i (et): 900  (             213)
Cumulative Stats (mean,std,max,argmax)
thrust   |2104.62 |2635.70 |  0.00 |6936.72 |    21
 
Final Stats (mean,std,min,max)
hit_reward |     6.9 |     4.6 |     0.0 |    10.0
hit_100cm |    0.99 |    0.10 |    0.00 |    1.00
hit_50cm |    0.69 |    0.46 |    0.00 |    1.00
norm_vf  |6814.320 | 159.175 |6105.202 |7024.207
norm_rf  |     0.4 |     0.2 |     0.1 |     1.7
position | 30567.2  -173.3 49915.6 |  1098.5  3107.6  3165.9 | 28193.4 -5690.6 44223.4 | 34257.7  5626.4 55673.1
velocity |-2783.780  -5.577   3.836 | 175.444 783.606 812.233 |-3022.236-1724.052-1749.429 |-1997.8491760.6701745.312
fuel     |  8.64 |  2.95 |  1.89 | 19.73
attitude_321 |   0.020  -0.003  -0.174 |   2.911   0.281   1.795 |  -3.140  -0.643  -3.128 |   3.140   0.638   3

i (et): 1700  (             208)
Cumulative Stats (mean,std,max,argmax)
thrust   |2094.07 |2630.88 |  0.00 |6936.72 |    21
 
Final Stats (mean,std,min,max)
hit_reward |     7.1 |     4.6 |     0.0 |    10.0
hit_100cm |    0.99 |    0.11 |    0.00 |    1.00
hit_50cm |    0.71 |    0.46 |    0.00 |    1.00
norm_vf  |6819.750 | 157.418 |6062.701 |7024.207
norm_rf  |     0.4 |     0.2 |     0.1 |     2.1
position | 30515.0   -34.4 49910.5 |  1085.2  3094.3  3164.3 | 27957.6 -5786.1 44121.9 | 34257.7  5655.0 55673.1
velocity |-2792.229  -0.784   0.053 | 172.364 773.337 793.203 |-3022.236-1724.052-1777.615 |-1997.8491770.3341745.312
fuel     |  8.57 |  2.94 |  1.89 | 19.73
attitude_321 |   0.015  -0.000  -0.081 |   2.915   0.275   1.787 |  -3.141  -0.643  -3.136 |   3.140   0.702   3.128
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.74950343e-13 2.11739863e-12] 2.66150706

i (et): 2500  (             214)
Cumulative Stats (mean,std,max,argmax)
thrust   |2088.31 |2628.90 |  0.00 |6936.72 |    21
 
Final Stats (mean,std,min,max)
hit_reward |     7.0 |     4.6 |     0.0 |    10.0
hit_100cm |    0.99 |    0.11 |    0.00 |    1.00
hit_50cm |    0.70 |    0.46 |    0.00 |    1.00
norm_vf  |6818.186 | 158.171 |6062.701 |7027.536
norm_rf  |     0.4 |     0.2 |     0.0 |     3.0
position | 30520.9   -43.4 50000.7 |  1074.7  3083.8  3171.4 | 27957.6 -5786.1 44121.9 | 34257.7  5655.0 55727.2
velocity |-2790.474 -17.310   9.130 | 172.147 772.224 800.005 |-3022.236-1783.799-1777.615 |-1997.8491800.4091776.505
fuel     |  8.55 |  2.92 |  1.89 | 19.73
attitude_321 |  -0.034  -0.003  -0.069 |   2.915   0.277   1.794 |  -3.141  -0.643  -3.140 |   3.140   0.702   3.132
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.74950343e-13 2.11739863e-12] 2.66150706

i (et): 3300  (             218)
Cumulative Stats (mean,std,max,argmax)
thrust   |2089.77 |2630.88 |  0.00 |6936.72 |    21
 
Final Stats (mean,std,min,max)
hit_reward |     7.0 |     4.6 |     0.0 |    10.0
hit_100cm |    0.99 |    0.11 |    0.00 |    1.00
hit_50cm |    0.70 |    0.46 |    0.00 |    1.00
norm_vf  |6820.045 | 158.556 |6062.701 |7027.536
norm_rf  |     0.4 |     0.2 |     0.0 |     3.0
position | 30510.7   -19.8 49986.5 |  1085.1  3083.1  3151.2 | 27908.3 -5786.1 44121.9 | 34257.7  5856.2 55727.2
velocity |-2792.349  -6.836   6.130 | 172.547 770.481 794.928 |-3022.236-1783.799-1777.615 |-1975.5771800.4091875.267
fuel     |  8.56 |  2.90 |  1.79 | 19.73
attitude_321 |   0.005  -0.002  -0.040 |   2.917   0.275   1.788 |  -3.141  -0.679  -3.140 |   3.141   0.702   3.135
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.75259041e-13 2.11903340e-12] 2.66283970

i (et): 4100  (             214)
Cumulative Stats (mean,std,max,argmax)
thrust   |2091.16 |2631.13 |  0.00 |6936.72 |    21
 
Final Stats (mean,std,min,max)
hit_reward |     7.0 |     4.6 |     0.0 |    10.0
hit_100cm |    0.99 |    0.11 |    0.00 |    1.00
hit_50cm |    0.70 |    0.46 |    0.00 |    1.00
norm_vf  |6819.029 | 159.172 |6062.701 |7030.857
norm_rf  |     0.4 |     0.2 |     0.0 |     4.1
position | 30518.7   -27.6 49974.2 |  1090.0  3096.2  3142.3 | 27908.3 -5786.1 44121.9 | 34620.3  5856.2 55727.2
velocity |-2790.648  -3.150  -2.311 | 173.096 775.948 795.347 |-3022.236-1783.799-1777.756 |-1975.5771800.4091875.267
fuel     |  8.56 |  2.88 |  1.44 | 19.73
attitude_321 |   0.027   0.000  -0.042 |   2.916   0.274   1.785 |  -3.141  -0.679  -3.140 |   3.142   0.702   3.142
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.75259041e-13 2.11903340e-12] 2.66283970

i (et): 4900  (             208)
Cumulative Stats (mean,std,max,argmax)
thrust   |2096.37 |2632.75 |  0.00 |6936.72 |  4157
 
Final Stats (mean,std,min,max)
hit_reward |     7.0 |     4.6 |     0.0 |    10.0
hit_100cm |    0.99 |    0.12 |    0.00 |    1.00
hit_50cm |    0.70 |    0.46 |    0.00 |    1.00
norm_vf  |6819.593 | 159.306 |6062.701 |7031.454
norm_rf  |     0.4 |     0.2 |     0.0 |     4.1
position | 30515.6   -19.2 49991.0 |  1094.5  3085.5  3143.0 | 27908.3 -5786.1 44121.9 | 34620.3  5856.2 55928.7
velocity |-2791.030 -10.063   3.709 | 173.163 774.755 795.429 |-3025.714-1783.799-1777.756 |-1975.5771800.4091878.619
fuel     |  8.59 |  2.90 |  1.44 | 19.73
attitude_321 |   0.026  -0.002  -0.037 |   2.915   0.274   1.789 |  -3.142  -0.679  -3.140 |   3.142   0.702   3.142
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.75259041e-13 2.11903340e-12] 2.66283970

In [7]:
theta = envu.deg2rad(0.01)
1000*np.sin(theta)

0.1745329243133368