In [1]:
import numpy as np
import os,sys


sys.path.append('../../../RL_lib/Agents')
sys.path.append('../../../RL_lib/Policies/PPO')
sys.path.append('../../../RL_lib/Policies/Common')
sys.path.append('../../../RL_lib/Utils')
sys.path.append('../../Env')
sys.path.append('../../Imaging')

%load_ext autoreload
%load_ext autoreload
%autoreload 2
%matplotlib nbagg
import os
print(os.getcwd())

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/Users/briangaudet/Study/Subjects/MachineLearning/Projects/RL4GNC/Exo_intercept/Experiments/Optimize_50cm


In [2]:
%%html
<style>
.output_wrapper, .output {
    height:auto !important;
    max-height:1000px;  /* your desired max-height here */
}
.output_scroll {
    box-shadow:none !important;
    webkit-box-shadow:none !important;
}
</style>

# Optimize Policy

In [4]:
from env import Env
import env_utils as envu
from reward_sensor_gaussian_ofonly import Reward

import attitude_utils as attu

from missile import Missile
from target import Target

from missile_icgen import Missile_icgen
from target_icgen import Target_icgen

from dynamics_model_3dof import Dynamics_model_3dof as Target_dynamics_model
from dynamics_model_6dof import Dynamics_model_6dof as Missile_dynamics_model

from spiral_policy import Spiral_policy as Target_policy

from no_att_constraint import No_att_constraint
from no_w_constraint import No_w_constraint

######### RL vs PN ###########

is_RL = True

########## RL ###########

import rl_utils
from arch_policy_vf import Arch
import policy_nets as policy_nets
import valfunc_nets as vf_nets
from agent import Agent
from value_function import Value_function

if is_RL:
    from policy_ppo import Policy
    from softmax_pd import Softmax_pd as PD
else:
    from zem_policy import ZEM_policy as Policy
    
######### Actuator Models #########

from actuator_model_ekv import Actuator_model_ekv as Missile_actuator_model 
from actuator_model_3dof import Actuator_model_3dof as Target_actuator_model

######## Sensor ##############

from angle_sensor import Angle_sensor
from eo_model import EO_model
import optics_utils as optu

ap = attu.Quaternion_attitude()

offset=np.asarray([0,0])
C_cb = optu.rotate_optical_axis(0.0, np.pi/2, 0.0)
r_cb = np.asarray([0,0,0])
fov=np.pi-np.pi/8
cm = EO_model(attitude_parameterization=ap, C_cb=C_cb, r_cb=r_cb, 
                   fov=fov, debug=False, p_x=96,p_y=96)
sensor = Angle_sensor(cm, attitude_parameterization=ap,  use_range=True, ignore_fov_vio=not is_RL,
                      use_ideal_offset=False,
                      pool_type='max', state_type=Angle_sensor.optflow_state, optflow_scale=1.0)


########## Target ############

target_voffset = 10
target_max_acc = 5*9.81
target_max_acc_range = (0., target_max_acc)
target_dynamics_model = Target_dynamics_model(h=0.02,M=1e3)
target_actuator_model = Target_actuator_model(max_acc=target_max_acc)
#target_policy = Target_policy(3,max_acc_range=target_max_acc_range,tf=80)
target_policy = Target_policy(3,max_acc=target_max_acc,qp_range=(10,50))
target = Target(target_policy, target_actuator_model, target_dynamics_model, attitude_parameterization=ap)

target_icgen = Target_icgen(attitude_parameterization=ap,
                            min_init_position=(0.0, 0.0, 50000.),
                            max_init_position=(0.0, 0.0, 50000.),
                            v_mag=(4000., 4000.),
                            v_theta=(envu.deg2rad(90-target_voffset), envu.deg2rad(90+target_voffset)),
                            v_phi=(envu.deg2rad(-target_voffset), envu.deg2rad(target_voffset)))

########## Missile  #############

missile_roffset = 10
missile_mass = 50
missile_max_thrust =  10*9.81*missile_mass

missile_dynamics_model = Missile_dynamics_model(h=0.02,M=1e3)

missile_actuator_model = Missile_actuator_model(max_thrust=missile_max_thrust,pulsed=True)
missile = Missile(target, missile_actuator_model, missile_dynamics_model, sensor=sensor, 
                  attitude_parameterization=ap,
                  w_constraint=No_w_constraint(), att_constraint=No_att_constraint(ap),
                 align_cv=False, debug_cv=False, perturb_pn_velocity=True)
if not is_RL:
    missile.get_state_agent = missile.get_state_agent_PN_att
    
missile_icgen = Missile_icgen(attitude_parameterization=ap,
                           position_r=(50000.,55000.),
                           position_theta=(envu.deg2rad(90-missile_roffset),envu.deg2rad(90+missile_roffset)),
                           position_phi=(envu.deg2rad(-missile_roffset),envu.deg2rad(missile_roffset)),
                           mag_v=(3000,3000),
                           heading_error=(envu.deg2rad(0),envu.deg2rad(5)),
                           attitude_error=(0.0,0.0),
                           debug=False)


reward_object = Reward(debug=False, hit_coeff=10., tracking_coeff=1., tracking_sigma=0.10, optflow_sigma=0.04, 
                       fuel_coeff=0.0, fov_coeff=-0., hit_rlimit=0.5)

logger = rl_utils.Logger()

env = Env(missile, target, missile_icgen, target_icgen,  logger, 
          precision_range=1000., precision_scale=300, terminate_on_vc=not is_RL,
          reward_object=reward_object, use_offset=False, debug_steps=True,
          tf_limit=50.0,print_every=10,nav_period=0.04)

                

##########################################
recurrent_steps = 200
if is_RL:
    obs_dim = 4
    action_dim = 4
    actions_per_dim = 2
    logit_dim = action_dim * actions_per_dim
    policy = Policy(policy_nets.GRU1(obs_dim, logit_dim, recurrent_steps=recurrent_steps), 
                PD(action_dim, actions_per_dim),
                shuffle=False,
                kl_targ=0.001,epochs=20, beta=0.1, servo_kl=False, max_grad_norm=30, scale_vector_obs=True,
                init_func=rl_utils.xn_init)
else:
    policy = Policy(ap=ap, N=5, max_acc=missile_max_thrust / missile_mass)
    obs_dim = 19
    act_dim = 4
    
arch = Arch()

value_function = Value_function(vf_nets.GRU1(obs_dim, recurrent_steps=recurrent_steps), scale_obs=True,
                                shuffle=False, batch_size=9999999, max_grad_norm=30, 
                                verbose=False)

agent = Agent(arch, policy, value_function, None, env, logger,
              policy_episodes=30, policy_steps=3000, gamma1=0.90, gamma2=0.995, 
              recurrent_steps=recurrent_steps, monitor=env.rl_stats)

if is_RL:
    agent.train(300000)

Quaternion_attitude
Euler321 Attitude
C_cb: 
[[ 6.123234e-17  0.000000e+00 -1.000000e+00]
 [ 0.000000e+00  1.000000e+00  0.000000e+00]
 [ 1.000000e+00  0.000000e+00  6.123234e-17]]
[1.000000e+00 0.000000e+00 6.123234e-17]
using max  pooling
Angle sensor:
	Output State type:  <function Angle_sensor.optflow_state at 0x1549468c8>
	Offset Init type:  <function Angle_sensor.offset_init1 at 0x154946510>
	Fixed Offset:  None
3dof dynamics model
3-dof Actuator Model:  49.050000000000004
Inertia Tensor:  [[333.33333333   0.           0.        ]
 [  0.         333.33333333   0.        ]
 [  0.           0.         333.33333333]]
Target Model: 
 - foo:  0.0
6dof dynamics model
thruster model:  [4905. 4905. 4905. 4905.]
Rotational Velocity Constraint
Attitude Constraint
Inertia Tensor:  [[6.25       0.         0.        ]
 [0.         7.29166667 0.        ]
 [0.         0.         7.29166667]]
Missile Model: 
 - foo:  0.0
Reward_terminal


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Missile env fixed (h= 0.02
[[-1.]
 [ 1.]]
Policy with vectorized sample
	xn_init: layer  Linear(in_features=4, out_features=40, bias=True)
	xn_init: layer  GRUCell(40, 56)
	xn_init: layer  Linear(in_features=56, out_features=80, bias=True)
	xn_init: layer  Linear(in_features=80, out_features=8, bias=True)
Policy: recurrent steps > 1, disabling shuffle
	Test Mode:          False
	Clip Param:         0.1
	Shuffle :           False
	Shuffle by Chunks:  False
	Max Grad Norm:      30
	Recurrent Steps:    200
	Rollout Limit:      1
	Advantage Func:     <advantage_utils.Adv_default object at 0x1569b1c50>
	Advantage Norm:     <function Adv_normalizer.apply at 0x15493fd90>
	PD:                 <softmax_pd.Softmax_pd object at 0x1569dbcf8>
	Loss Function:      <bound method Policy.calc_loss1 of <policy_ppo.Policy object at 0x1569dbd68>>
Value Funtion
	xn_init: layer  Linear(in_features=4, out_features=40, bias=True)
	xn_init: layer  GRUCell(40, 14)
	xn_init: layer  Linear(in_features=14, out_fea

ADV1:  -0.02656794188417585 0.1901165922068668 0.3600810441916723 -0.9906493818140215
ADV2:  0.049046703950705456 0.8202441748129329 1.882795846624158 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0459   0.0135   0.0753   0.0753   0.0646   0.0135
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0493   0.0194   0.0983   1.2858   0.7421   0.3504
***** Episode 217, Mean R = 137.5  Std R = 13.7  Min R = 108.1
PolicyLoss: -0.0288
Policy_Beta: 0.1
Policy_Entropy: 0.0967
Policy_KL: 0.00219
Policy_SD: 0.805
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 4.79e+04
VF_0_ExplainedVarNew: 0.744
VF_0_ExplainedVarOld: 0.731
VF_0_Loss : 0.104


ADV1:  0.0018637803544044236 0.1780055466116048 0.3603526301530533 -0.9937009811401367
ADV2:  0.023413932529689846 0.7981477655577442 1.8325149358088315 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0456   0.0007   0.0461   0.0753   0.0646   0.0135
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0843   0.0461   0.1805   1.2858   

ADV1:  0.031800635584694974 0.15577307208104127 0.4380598728894527 -0.9809881448745728
ADV2:  0.00193027766661975 0.8035104995723507 2.418345345389605 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0735   0.0044   0.0761   0.1714   0.0781   0.0374
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0597   0.0283   0.1385   1.2858   0.7421   0.3504
***** Episode 496, Mean R = 139.0  Std R = 14.2  Min R = 117.0
PolicyLoss: -0.0358
Policy_Beta: 0.1
Policy_Entropy: 0.098
Policy_KL: 0.00145
Policy_SD: 0.817
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 1.02e+05
VF_0_ExplainedVarNew: 0.845
VF_0_ExplainedVarOld: 0.826
VF_0_Loss : 0.112


ADV1:  -0.008106275733845418 0.1326938846463585 0.2607100863868762 -0.7868157074996691
ADV2:  0.03583871862450529 0.8049473230486784 1.8621667613497912 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0456   0.0205   0.0812   0.1714   0.0781   0.0374
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0949   0.0476   0.1727   1.2858   0.

ADV1:  0.0027193284919881933 0.06797861457532936 0.44940465688705444 -0.4086559412302134
ADV2:  0.0028270715792759037 0.8234970902800902 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0399   0.0090   0.0490   0.1714   0.0781   0.0374
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0166   0.0065   0.0325   1.2858   0.7421   0.3504
***** Episode 775, Mean R = 142.1  Std R = 15.8  Min R = 108.2
PolicyLoss: -0.0145
Policy_Beta: 0.1
Policy_Entropy: 0.0965
Policy_KL: 0.00183
Policy_SD: 0.808
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 1.55e+05
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.123


ADV1:  -0.005159241256348044 0.07829057243530488 0.31879252195358276 -0.6914997570739039
ADV2:  0.03495066340723961 0.8044660737398056 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0354   0.0057   0.0401   0.1714   0.0781   0.0374
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0122   0.0044   0.0240   1.2858   0.7421   0.3504
***** Ep

ADV1:  -0.002258727805865072 0.0662282059856595 0.245957234941524 -0.5965478274861999
ADV2:  0.0356497885938409 0.781759343593765 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0409   0.0050   0.0469   0.2581   0.1133   0.0591
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0140   0.0052   0.0281   1.2858   0.7421   0.3504
***** Episode 1054, Mean R = 141.0  Std R = 16.5  Min R = 112.3
PolicyLoss: -0.0373
Policy_Beta: 0.1
Policy_Entropy: 0.096
Policy_KL: 0.00189
Policy_SD: 0.802
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 2.09e+05
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.084


ADV1:  0.007007988257564957 0.058541918523705494 0.28651339229013495 -0.4100257088042992
ADV2:  0.009636191571152506 0.8507776967234484 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0270   0.0005   0.0278   0.2581   0.1133   0.0591
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0144   0.0063   0.0299   1.2858   0.7421   0.3504
***** Episode 1

ADV1:  -0.0031565316696283777 0.06029410809611413 0.20866288617689177 -0.5402315425259171
ADV2:  0.03493039136235134 0.7792005859149453 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0644   0.0457   0.2126   0.2581   0.1133   0.0591
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0098   0.0046   0.0199   1.2858   0.7421   0.3504
***** Episode 1333, Mean R = 140.0  Std R = 17.0  Min R = 112.5
PolicyLoss: -0.0342
Policy_Beta: 0.1
Policy_Entropy: 0.0973
Policy_KL: 0.00205
Policy_SD: 0.812
Policy_lr_mult: 1
Steps: 5.92e+03
TotalSteps: 2.63e+05
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.0949


ADV1:  0.0007842485765517258 0.05594855595760774 0.17284713606185964 -0.47684558435107893
ADV2:  0.023799155512424154 0.7793477614008261 2.80439964789674 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0519   0.0247   0.1113   0.2581   0.1133   0.0591
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0115   0.0043   0.0221   1.2858   0.7421  

ADV1:  0.010682593762149774 0.05468669802438508 0.2741756319893476 -0.4139341121288509
ADV2:  -0.04086838106320422 0.8213157176074852 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0245   0.0031   0.0279   0.2581   0.1133   0.0591
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0285   0.0135   0.0654   1.2858   0.7421   0.3504
***** Episode 1612, Mean R = 142.6  Std R = 11.6  Min R = 120.1
PolicyLoss: -0.000901
Policy_Beta: 0.1
Policy_Entropy: 0.0989
Policy_KL: 0.00127
Policy_SD: 0.806
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 3.17e+05
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.186


ADV1:  0.0008565468915043847 0.061112392456575154 0.23996797106221135 -0.7896125174278141
ADV2:  0.02299586338215596 0.7903873046423987 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0381   0.0032   0.0445   0.2581   0.1133   0.0591
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0129   0.0057   0.0263   1.2858   0.7421   0.3504
***** E

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0346   0.0126   0.0683   0.3690   0.1192   0.0800
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0128   0.0064   0.0265   1.2858   0.7421   0.3504
***** Episode 1891, Mean R = 143.3  Std R = 17.2  Min R = 103.9
PolicyLoss: -0.0404
Policy_Beta: 0.1
Policy_Entropy: 0.101
Policy_KL: 0.000703
Policy_SD: 0.808
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 3.71e+05
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.167


ADV1:  0.00039717994720941274 0.04067463880272795 0.24173910915851593 -0.32784154709222196
ADV2:  0.014907871368223335 0.8111204898430433 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0606   0.0339   0.1489   0.3690   0.1192   0.0800
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0089   0.0038   0.0184   1.2858   0.7421   0.3504
***** Episode 1922, Mean R = 145.5  Std R = 13.0  Min R = 107.1
PolicyLoss: -0.0217
Policy_Beta: 0.1
Policy_Entropy: 0.102
Policy_KL: 0.00173
Policy_S

theta_cv |    0.03 |    0.03 |    0.00 |    0.32
steps    |  193.59 |    7.43 |  179.00 |  226.00
***** Episode 2170, Mean R = 148.7  Std R = 13.0  Min R = 129.3
PolicyLoss: -0.0135
Policy_Beta: 0.1
Policy_Entropy: 0.109
Policy_KL: 0.00351
Policy_SD: 0.822
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 4.25e+05
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.218


ADV1:  -0.004888626660163814 0.040225744031844535 0.13768449404478167 -0.33243434472323363
ADV2:  0.03679384072868515 0.8408355107902242 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1929   0.1569   0.6550   1.2934   0.4520   0.3232
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0267   0.0151   0.0519   1.2858   0.7421   0.3504
***** Episode 2201, Mean R = 141.2  Std R = 15.2  Min R = 114.7
PolicyLoss: -0.0244
Policy_Beta: 0.1
Policy_Entropy: 0.115
Policy_KL: 0.00096
Policy_SD: 0.823
Policy_lr_mult: 1
Steps: 5.92e+03
TotalSteps: 4.31e+05
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld

***** Episode 2480, Mean R = 160.8  Std R = 15.0  Min R = 133.1
PolicyLoss: -0.0238
Policy_Beta: 0.1
Policy_Entropy: 0.113
Policy_KL: 0.000914
Policy_SD: 0.827
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 4.85e+05
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.146


ADV1:  -0.0019953638442733877 0.04511428521478398 0.22128728294629618 -0.45373313924442504
ADV2:  0.03904501379874524 0.7173368623699485 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2054   0.1171   0.5363   1.2934   0.4520   0.3232
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2173   0.1710   0.7623   1.2858   0.7421   0.3504
***** Episode 2511, Mean R = 161.7  Std R = 16.7  Min R = 131.3
PolicyLoss: -0.0389
Policy_Beta: 0.1
Policy_Entropy: 0.116
Policy_KL: 0.00128
Policy_SD: 0.826
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 4.91e+05
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.166


ADV1:  0.005191806098566201 0.05543352896358973 0.348909148529141 -0

optflow_error |  0.0988 |  0.7386 |  0.0000 | 32.2522
pixel_icoords |   -0.24    0.27 |    6.67    6.36 |  -14.17  -16.21 |   15.29   15.19
theta_cv |    0.04 |    0.06 |    0.00 |    0.42
steps    |  192.96 |    7.10 |  180.00 |  213.00
***** Episode 2790, Mean R = 165.7  Std R = 13.1  Min R = 135.2
PolicyLoss: -0.0291
Policy_Beta: 0.1
Policy_Entropy: 0.128
Policy_KL: 0.00092
Policy_SD: 0.829
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 5.45e+05
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.184


ADV1:  0.008131137854423902 0.04959961079103971 0.3197007179260254 -0.5369361215674056
ADV2:  -0.011664378786287762 0.7518894281895068 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3428   0.2068   0.8878   1.2934   0.4520   0.3232
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0379   0.0251   0.1178   1.2858   0.7421   0.3504
***** Episode 2821, Mean R = 167.8  Std R = 13.1  Min R = 124.0
PolicyLoss: -0.029
Policy_Beta: 0.1
Policy_Entropy: 0.12
Po

***** Episode 3100, Mean R = 167.7  Std R = 10.9  Min R = 139.9
PolicyLoss: -0.0164
Policy_Beta: 0.1
Policy_Entropy: 0.137
Policy_KL: 0.00122
Policy_SD: 0.833
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 6.04e+05
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.111


ADV1:  0.0025287301004095447 0.03281015655789327 0.33730244345639304 -0.19831364097270376
ADV2:  -0.0302232191478197 0.7625925055618811 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6716   0.4013   1.9581   2.4515   0.6866   0.4874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0138   0.0074   0.0288   1.2858   0.7421   0.3504
***** Episode 3131, Mean R = 167.5  Std R = 10.9  Min R = 131.7
PolicyLoss: 0.0155
Policy_Beta: 0.1
Policy_Entropy: 0.129
Policy_KL: 0.00193
Policy_SD: 0.83
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 6.11e+05
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.178


ADV1:  -0.0019063004877664562 0.043087698342047676 0.20458604061970176 

optflow_error |  0.1008 |  0.8541 |  0.0000 | 36.4594
pixel_icoords |   -0.74    0.42 |    6.58    6.90 |  -17.28  -16.55 |   15.92   16.85
theta_cv |    0.06 |    0.07 |    0.00 |    0.46
steps    |  193.65 |    7.49 |  179.00 |  219.00
***** Episode 3410, Mean R = 167.3  Std R = 8.8  Min R = 145.9
PolicyLoss: -0.0204
Policy_Beta: 0.1
Policy_Entropy: 0.138
Policy_KL: 0.00177
Policy_SD: 0.838
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 6.65e+05
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.12


ADV1:  -0.00967861638243401 0.04651109569827052 0.17045138401851612 -0.8157140612602234
ADV2:  0.05288589194760474 0.8018817272409304 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1370   0.1112   0.4402   2.4515   0.6866   0.4874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0539   0.0339   0.1421   1.2858   0.7421   0.3504
***** Episode 3441, Mean R = 165.9  Std R = 11.4  Min R = 132.0
PolicyLoss: -0.0304
Policy_Beta: 0.1
Policy_Entropy: 0.139
Po

***** Episode 3720, Mean R = 164.0  Std R = 14.9  Min R = 130.5
PolicyLoss: -0.0197
Policy_Beta: 0.1
Policy_Entropy: 0.147
Policy_KL: 0.000787
Policy_SD: 0.836
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 7.24e+05
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.145


Dynamics: Max Disturbance (m/s^2):  [1.58879116e-12 3.03340678e-13 2.13668871e-12] 2.6798715547466783e-12
Dynamics: Max Disturbance (m/s^2):  [1.43894322e-12 2.46906928e-13 2.68884927e-12] 3.0596455668229396e-12
ADV1:  -0.0004430263445069816 0.03666752390155442 0.316566425130165 -0.38328842488307524
ADV2:  0.006841098065762992 0.7455068773527171 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5291   0.2914   1.2872   2.4515   0.6866   0.4874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0138   0.0054   0.0271   1.2858   0.7421   0.3504
***** Episode 3751, Mean R = 166.5  Std R = 10.8  Min R = 136.2
PolicyLoss: -0.00798
Policy_Beta: 0.1
Policy_Entropy: 0.139
Policy_KL: 0.00136
Policy

tracking_error |  0.0224 |  0.0590 |  0.0000 |  1.2971
optflow_error |  0.0995 |  0.8862 |  0.0000 | 37.6487
pixel_icoords |   -0.27    0.39 |    6.93    6.63 |  -16.59  -14.31 |   16.30   14.98
theta_cv |    0.06 |    0.07 |    0.00 |    0.42
steps    |  193.97 |    7.40 |  179.00 |  220.00
***** Episode 4030, Mean R = 173.3  Std R = 10.4  Min R = 156.7
PolicyLoss: 0.0133
Policy_Beta: 0.1
Policy_Entropy: 0.13
Policy_KL: 0.00194
Policy_SD: 0.826
Policy_lr_mult: 1
Steps: 6.08e+03
TotalSteps: 7.85e+05
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.242


ADV1:  -0.007537679928081674 0.0378516838416102 0.1331400298206582 -0.3715420506842301
ADV2:  0.056150909369818126 0.7717412634596187 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2233   0.1069   0.4217   2.4515   0.6866   0.4874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0457   0.0245   0.1002   1.2858   0.7421   0.3504
***** Episode 4061, Mean R = 167.8  Std R = 12.7  Min R = 141.7
PolicyL

optflow_error |  0.0924 |  0.8500 |  0.0000 | 46.1508
pixel_icoords |   -0.29    0.80 |    6.82    6.58 |  -15.89  -14.06 |   15.14   16.48
theta_cv |    0.05 |    0.07 |    0.00 |    0.38
steps    |  193.64 |    6.69 |  179.00 |  216.00
***** Episode 4340, Mean R = 170.2  Std R = 9.5  Min R = 149.9
PolicyLoss: 0.017
Policy_Beta: 0.1
Policy_Entropy: 0.131
Policy_KL: 0.000804
Policy_SD: 0.834
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 8.45e+05
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.144


ADV1:  -8.73331062823292e-05 0.03438934547203742 0.2594285449592819 -0.4524186723010011
ADV2:  0.010619186721816996 0.7532443681185812 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2302   0.1341   0.5402   2.4515   0.6866   0.4874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0129   0.0059   0.0269   1.2858   0.7421   0.3504
***** Episode 4371, Mean R = 169.3  Std R = 12.0  Min R = 129.0
PolicyLoss: -0.0131
Policy_Beta: 0.1
Policy_Entropy: 0.138
Poli

theta_cv |    0.06 |    0.07 |    0.00 |    0.40
steps    |  193.04 |    6.95 |  179.00 |  218.00
***** Episode 4650, Mean R = 172.2  Std R = 8.8  Min R = 154.2
PolicyLoss: -0.027
Policy_Beta: 0.1
Policy_Entropy: 0.139
Policy_KL: 0.000844
Policy_SD: 0.84
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 9.04e+05
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.166


ADV1:  0.0019339809706761279 0.03414242027148744 0.307491800192111 -0.33797327817661516
ADV2:  -0.017020669532337187 0.7379982004960493 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2460   0.1093   0.4527   2.4515   0.6866   0.4874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0282   0.0152   0.0556   1.2858   0.7421   0.3504
***** Episode 4681, Mean R = 171.1  Std R = 8.8  Min R = 150.0
PolicyLoss: 0.00454
Policy_Beta: 0.1
Policy_Entropy: 0.132
Policy_KL: 0.00124
Policy_SD: 0.827
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 9.1e+05
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.

tracking_error |  0.0205 |  0.0547 |  0.0000 |  1.2614
optflow_error |  0.0974 |  0.8913 |  0.0000 | 28.4869
pixel_icoords |    0.66   -0.10 |    6.71    7.10 |  -13.65  -15.89 |   19.56   14.63
theta_cv |    0.06 |    0.07 |    0.00 |    0.43
steps    |  193.26 |    7.38 |  180.00 |  218.00
***** Episode 4960, Mean R = 172.0  Std R = 6.9  Min R = 147.8
PolicyLoss: -0.0287
Policy_Beta: 0.1
Policy_Entropy: 0.133
Policy_KL: 0.00117
Policy_SD: 0.825
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 9.64e+05
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.126


ADV1:  -0.0031125769505995506 0.03167887854129333 0.23093749800758723 -0.2880158200979856
ADV2:  0.0311530869857272 0.7470378239540383 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2533   0.1418   0.5666   2.4515   0.6866   0.4874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0162   0.0082   0.0334   1.2858   0.7421   0.3504
***** Episode 4991, Mean R = 169.1  Std R = 8.0  Min R = 141.5
Polic

***** Episode 5270, Mean R = 168.6  Std R = 9.4  Min R = 145.1
PolicyLoss: -0.0292
Policy_Beta: 0.1
Policy_Entropy: 0.143
Policy_KL: 0.00151
Policy_SD: 0.851
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 1.02e+06
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.0684


ADV1:  0.006695251254710354 0.035713419862093934 0.30087392075963737 -0.15621857276078566
ADV2:  -0.06770060772778112 0.7491310393367387 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1365   0.0542   0.2204   2.4515   0.6866   0.4874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0659   0.0348   0.1362   1.2858   0.7421   0.3504
***** Episode 5301, Mean R = 170.9  Std R = 10.0  Min R = 142.7
PolicyLoss: 0.0288
Policy_Beta: 0.1
Policy_Entropy: 0.14
Policy_KL: 0.00102
Policy_SD: 0.832
Policy_lr_mult: 1
Steps: 6.04e+03
TotalSteps: 1.03e+06
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.205


ADV1:  0.002768232790199071 0.03305110928143854 0.22367730575149658 -0.2

pixel_icoords |   -0.54    0.24 |    7.05    7.02 |  -16.66  -14.51 |   15.41   15.29
theta_cv |    0.06 |    0.07 |    0.00 |    0.40
steps    |  193.82 |    7.12 |  180.00 |  216.00
***** Episode 5580, Mean R = 169.3  Std R = 11.3  Min R = 126.5
PolicyLoss: -0.0264
Policy_Beta: 0.1
Policy_Entropy: 0.142
Policy_KL: 0.000681
Policy_SD: 0.836
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 1.08e+06
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.119


ADV1:  -0.0027581709938429445 0.027988823755079256 0.5225562453269958 -0.2592704178513891
ADV2:  0.029415594380319147 0.7720362299681145 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1800   0.0570   0.3156   2.4515   0.6866   0.4874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0142   0.0073   0.0303   1.2858   0.7421   0.3504
***** Episode 5611, Mean R = 168.5  Std R = 11.3  Min R = 141.4
PolicyLoss: -0.0148
Policy_Beta: 0.1
Policy_Entropy: 0.142
Policy_KL: 0.00118
Policy_SD: 0.838
Policy_lr_mult: 1


tracking_error |  0.0211 |  0.0543 |  0.0000 |  1.3274
optflow_error |  0.0960 |  0.8869 |  0.0000 | 52.6933
pixel_icoords |    0.34    0.64 |    6.83    6.61 |  -14.77  -13.87 |   16.51   16.11
theta_cv |    0.06 |    0.07 |    0.00 |    0.40
steps    |  193.44 |    6.86 |  180.00 |  222.00
***** Episode 5890, Mean R = 170.2  Std R = 9.7  Min R = 148.8
PolicyLoss: -0.0271
Policy_Beta: 0.1
Policy_Entropy: 0.141
Policy_KL: 0.000924
Policy_SD: 0.833
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 1.14e+06
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.188


Dynamics: Max Disturbance (m/s^2):  [1.59155926e-12 3.03340678e-13 2.13668871e-12] 2.6815135808965905e-12
Dynamics: Max Disturbance (m/s^2):  [1.43894322e-12 2.46906928e-13 2.68884927e-12] 3.0596455668229396e-12
ADV1:  0.0016689457242924312 0.03606097767789981 0.21541222753278455 -0.34592118859291077
ADV2:  -0.0019311465998752039 0.7666066816280181 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2976 

***** Episode 6200, Mean R = 169.0  Std R = 9.9  Min R = 147.0
PolicyLoss: -0.0195
Policy_Beta: 0.1
Policy_Entropy: 0.141
Policy_KL: 0.00198
Policy_SD: 0.842
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 1.2e+06
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.102


ADV1:  -4.225548326182134e-05 0.02834059889210273 0.17797658360079394 -0.21742083739315438
ADV2:  0.0007231572886171107 0.7883688722631154 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2828   0.1518   0.6339   2.4515   0.6866   0.4874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0120   0.0042   0.0210   1.2858   0.7421   0.3504
***** Episode 6231, Mean R = 169.6  Std R = 9.5  Min R = 145.9
PolicyLoss: -0.00344
Policy_Beta: 0.1
Policy_Entropy: 0.14
Policy_KL: 0.00134
Policy_SD: 0.841
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 1.21e+06
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.122


ADV1:  -0.0005894817426397422 0.03177123708778772 0.256792874525563 -0

a_f      |   -0.01   -0.07 |    0.27    1.78 |   -0.66   -3.10 |    0.58    3.13
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.00 |    0.00 |    0.00 |    0.00
tracking_rewards |  169.90 |    9.17 |  135.01 |  201.18
tracking_error |  0.0199 |  0.0514 |  0.0000 |  1.2597
optflow_error |  0.0935 |  0.9160 |  0.0000 | 34.8709
pixel_icoords |   -0.19    0.46 |    6.58    6.20 |  -17.98  -15.20 |   15.15   16.51
theta_cv |    0.05 |    0.06 |    0.00 |    0.38
steps    |  192.78 |    6.59 |  180.00 |  212.00
***** Episode 6510, Mean R = 171.7  Std R = 7.3  Min R = 156.9
PolicyLoss: -0.00506
Policy_Beta: 0.1
Policy_Entropy: 0.14
Policy_KL: 0.000797
Policy_SD: 0.834
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 1.26e+06
VF_0_ExplainedVarNew: 0.986
VF_0_Expla

***** Episode 6820, Mean R = 174.5  Std R = 10.7  Min R = 135.3
PolicyLoss: 0.0117
Policy_Beta: 0.1
Policy_Entropy: 0.129
Policy_KL: 0.00127
Policy_SD: 0.83
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 1.32e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.167


Dynamics: Max Disturbance (m/s^2):  [1.59155926e-12 3.03340678e-13 2.13668871e-12] 2.6815135808965905e-12
Dynamics: Max Disturbance (m/s^2):  [1.43894322e-12 2.46906928e-13 2.68884927e-12] 3.0596455668229396e-12
ADV1:  0.0012498001901516038 0.041441604507470066 0.28537965321109626 -0.312695245338142
ADV2:  -0.005913542288180385 0.7478374233203726 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2849   0.1436   0.5425   2.4515   0.6866   0.4874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0180   0.0073   0.0313   1.2858   0.7421   0.3504
***** Episode 6851, Mean R = 169.1  Std R = 10.4  Min R = 136.5
PolicyLoss: -0.00235
Policy_Beta: 0.1
Policy_Entropy: 0.149
Policy_KL: 0.00111
Polic

tracking_error |  0.0196 |  0.0483 |  0.0000 |  1.1943
optflow_error |  0.0869 |  0.9013 |  0.0000 | 40.1790
pixel_icoords |   -0.24   -0.03 |    7.11    7.15 |  -16.26  -17.91 |   17.09   16.58
theta_cv |    0.05 |    0.06 |    0.00 |    0.39
steps    |  194.21 |    7.50 |  180.00 |  225.00
***** Episode 7130, Mean R = 171.3  Std R = 10.3  Min R = 150.3
PolicyLoss: -0.0273
Policy_Beta: 0.1
Policy_Entropy: 0.144
Policy_KL: 0.00216
Policy_SD: 0.84
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 1.38e+06
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.0997


ADV1:  0.004837029005195844 0.03367318571964711 0.35027144342784555 -0.2326969384585474
ADV2:  -0.03900607721034092 0.7692234884640117 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2214   0.1372   0.6264   2.4515   0.6866   0.4874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0390   0.0239   0.1016   1.2858   0.7421   0.3504
***** Episode 7161, Mean R = 172.4  Std R = 9.4  Min R = 152.4
Poli

***** Episode 7440, Mean R = 174.8  Std R = 9.4  Min R = 152.0
PolicyLoss: -0.0108
Policy_Beta: 0.1
Policy_Entropy: 0.143
Policy_KL: 0.00108
Policy_SD: 0.838
Policy_lr_mult: 1
Steps: 6.06e+03
TotalSteps: 1.44e+06
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.137


ADV1:  -0.004110353648689318 0.03817401642260254 0.32466951917438247 -0.3562100453372482
ADV2:  0.02671314020129134 0.7056090068955909 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4780   0.1878   0.9804   2.4515   0.6866   0.4874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0457   0.0209   0.0833   1.2858   0.7421   0.3504
***** Episode 7471, Mean R = 173.1  Std R = 9.0  Min R = 154.2
PolicyLoss: -0.0101
Policy_Beta: 0.1
Policy_Entropy: 0.142
Policy_KL: 0.00179
Policy_SD: 0.836
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 1.45e+06
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.173


ADV1:  -0.002059977108691183 0.03269018681460848 0.25862931048110505 -0.

optflow_error |  0.0768 |  0.9039 |  0.0000 | 41.0320
pixel_icoords |   -0.48   -0.29 |    6.67    6.67 |  -16.55  -17.86 |   17.43   14.46
theta_cv |    0.05 |    0.06 |    0.00 |    0.38
steps    |  193.48 |    6.98 |  179.00 |  217.00
***** Episode 7750, Mean R = 175.3  Std R = 8.6  Min R = 149.0
PolicyLoss: -0.0235
Policy_Beta: 0.1
Policy_Entropy: 0.145
Policy_KL: 0.00122
Policy_SD: 0.85
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 1.5e+06
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.0654


Dynamics: Max Disturbance (m/s^2):  [1.59155926e-12 3.03340678e-13 2.13668871e-12] 2.6815135808965905e-12
Dynamics: Max Disturbance (m/s^2):  [1.43894322e-12 2.46906928e-13 2.68884927e-12] 3.0596455668229396e-12
ADV1:  0.0016211223800762446 0.03587068666854719 0.37642530288274867 -0.35054612080612535
ADV2:  -0.011574431480037188 0.7004606834889915 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8205   0.4907   2.1100   2.5686   0.8205   0.6131
ValFun  Grad

tracking_error |  0.0157 |  0.0381 |  0.0000 |  1.2397
optflow_error |  0.0738 |  0.9131 |  0.0000 | 41.5513
pixel_icoords |    0.37    0.47 |    6.97    6.73 |  -15.99  -15.31 |   17.72   16.30
theta_cv |    0.05 |    0.06 |    0.00 |    0.33
steps    |  193.55 |    7.49 |  180.00 |  220.00
***** Episode 8060, Mean R = 177.3  Std R = 8.9  Min R = 162.9
PolicyLoss: -0.0334
Policy_Beta: 0.1
Policy_Entropy: 0.132
Policy_KL: 0.00179
Policy_SD: 0.84
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 1.56e+06
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.221


ADV1:  -0.0005802395731571503 0.035312799089385366 0.23815315105884705 -0.4787486840544488
ADV2:  0.020119489341677965 0.7102185620936825 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3905   0.2349   0.8821   6.8917   2.7511   1.4815
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0059   0.0022   0.0110   1.2858   0.7421   0.3504
***** Episode 8091, Mean R = 177.0  Std R = 8.0  Min R = 161.3
Pol

tracking_error |  0.0148 |  0.0355 |  0.0000 |  1.1137
optflow_error |  0.0659 |  0.8452 |  0.0000 | 40.7341
pixel_icoords |   -0.73   -0.12 |    7.26    6.47 |  -15.44  -15.63 |   18.55   15.77
theta_cv |    0.05 |    0.06 |    0.00 |    0.36
steps    |  193.83 |    7.36 |  180.00 |  218.00
***** Episode 8370, Mean R = 180.4  Std R = 12.2  Min R = 143.9
PolicyLoss: -0.013
Policy_Beta: 0.1
Policy_Entropy: 0.136
Policy_KL: 0.00104
Policy_SD: 0.834
Policy_lr_mult: 1
Steps: 6.04e+03
TotalSteps: 1.62e+06
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.267


ADV1:  -0.008072782246802641 0.04898097727274134 0.21596173832248833 -0.5872225537445901
ADV2:  0.052649951861052195 0.7375142174109536 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1945   0.5293   2.4668   6.8917   2.7511   1.4815
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0930   0.0426   0.2001   1.2858   0.7421   0.3504
***** Episode 8401, Mean R = 175.1  Std R = 9.8  Min R = 145.3
Poli

theta_cv |    0.05 |    0.06 |    0.00 |    0.33
steps    |  193.77 |    6.54 |  181.00 |  212.00
***** Episode 8680, Mean R = 180.9  Std R = 7.9  Min R = 166.9
PolicyLoss: 0.00267
Policy_Beta: 0.1
Policy_Entropy: 0.131
Policy_KL: 0.00114
Policy_SD: 0.833
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 1.68e+06
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.192


Dynamics: Max Disturbance (m/s^2):  [1.59155926e-12 3.03340678e-13 2.13668871e-12] 2.6815135808965905e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.46906928e-13 2.68884927e-12] 3.0627151080043454e-12
ADV1:  -0.012663697752497198 0.044910658285119705 0.23661382775728246 -0.7414703755337083
ADV2:  0.07661299487530554 0.7201015901583988 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.8915   0.7772   3.4450   7.4597   2.8188   1.8691
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0711   0.0297   0.1440   1.2858   0.7421   0.3504
***** Episode 8711, Mean R = 179.0  Std R = 7.8  Mi

tracking_error |  0.0136 |  0.0284 |  0.0000 |  1.1262
optflow_error |  0.0516 |  0.7227 |  0.0000 | 41.4796
pixel_icoords |   -0.03    0.40 |    7.18    6.37 |  -16.12  -14.57 |   15.67   16.14
theta_cv |    0.05 |    0.05 |    0.00 |    0.33
steps    |  193.76 |    6.61 |  181.00 |  218.00
***** Episode 8990, Mean R = 177.0  Std R = 8.4  Min R = 158.7
PolicyLoss: -0.0111
Policy_Beta: 0.1
Policy_Entropy: 0.148
Policy_KL: 0.000794
Policy_SD: 0.848
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 1.74e+06
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.148


ADV1:  0.009559100376540798 0.04171798464590487 0.397409637359752 -0.35955183846339445
ADV2:  -0.05003499389507225 0.7513839950945089 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8501   0.4503   1.8633   7.4812   2.8188   1.8907
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0518   0.0250   0.1004   1.2858   0.7421   0.3504
***** Episode 9021, Mean R = 177.0  Std R = 10.0  Min R = 145.0
Poli

***** Episode 9300, Mean R = 180.3  Std R = 8.8  Min R = 157.2
PolicyLoss: -0.0461
Policy_Beta: 0.1
Policy_Entropy: 0.142
Policy_KL: 0.000951
Policy_SD: 0.842
Policy_lr_mult: 1
Steps: 5.92e+03
TotalSteps: 1.8e+06
VF_0_ExplainedVarNew: 0.927
VF_0_ExplainedVarOld: 0.932
VF_0_Loss : 0.158


ADV1:  -0.001383342008766412 0.04697142352852146 0.32777504736822805 -0.5141992163263986
ADV2:  0.016376422931280065 0.7289554453542961 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4363   0.9118   4.4484   7.4812   2.8388   1.8907
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0716   0.0428   0.1699   1.2858   0.7421   0.3504
***** Episode 9331, Mean R = 184.8  Std R = 7.4  Min R = 172.4
PolicyLoss: -0.0151
Policy_Beta: 0.1
Policy_Entropy: 0.13
Policy_KL: 0.00111
Policy_SD: 0.829
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 1.81e+06
VF_0_ExplainedVarNew: 0.939
VF_0_ExplainedVarOld: 0.945
VF_0_Loss : 0.191


Dynamics: Max Disturbance (m/s^2):  [1.59155926e-12 3.03340678e-13 2.136688

tracking_error |  0.0097 |  0.0173 |  0.0000 |  1.2349
optflow_error |  0.0286 |  0.6461 |  0.0000 | 53.1733
pixel_icoords |    0.31   -0.74 |    6.68    6.59 |  -14.66  -17.90 |   16.01   16.92
theta_cv |    0.05 |    0.05 |    0.00 |    0.30
steps    |  193.62 |    7.14 |  179.00 |  218.00
***** Episode 9610, Mean R = 183.7  Std R = 5.3  Min R = 173.9
PolicyLoss: 0.00322
Policy_Beta: 0.1
Policy_Entropy: 0.141
Policy_KL: 0.00115
Policy_SD: 0.844
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 1.86e+06
VF_0_ExplainedVarNew: 0.949
VF_0_ExplainedVarOld: 0.937
VF_0_Loss : 0.146


Dynamics: Max Disturbance (m/s^2):  [1.59155926e-12 3.03340678e-13 2.13668871e-12] 2.6815135808965905e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.48934556e-13 2.68884927e-12] 3.0628792361540385e-12
ADV1:  -0.004912528123357741 0.04932885045279877 0.4039977356546095 -0.6711564503669216
ADV2:  0.041228905633224726 0.6973796654857193 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.1125   0.

pixel_icoords |    0.18    0.33 |    6.77    6.63 |  -19.07  -15.10 |   15.88   15.76
theta_cv |    0.05 |    0.04 |    0.00 |    0.30
steps    |  193.35 |    7.10 |  180.00 |  215.00
***** Episode 9920, Mean R = 186.1  Std R = 7.2  Min R = 175.7
PolicyLoss: 0.00459
Policy_Beta: 0.1
Policy_Entropy: 0.134
Policy_KL: 0.00121
Policy_SD: 0.836
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 1.92e+06
VF_0_ExplainedVarNew: 0.948
VF_0_ExplainedVarOld: 0.944
VF_0_Loss : 0.149


ADV1:  0.007045175289070769 0.051606466132860865 0.4134587021714837 -0.4499983260489651
ADV2:  -0.02015200989174272 0.7369384961536439 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.2087   1.2646   6.1438   7.8863   2.8388   1.8907
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0648   0.0366   0.1258   1.2858   0.7421   0.3504
***** Episode 9951, Mean R = 185.1  Std R = 7.7  Min R = 168.4
PolicyLoss: 0.000335
Policy_Beta: 0.1
Policy_Entropy: 0.138
Policy_KL: 0.00108
Policy_SD: 0.839
Policy_lr_mult: 1


theta_cv |    0.05 |    0.04 |    0.00 |    0.28
steps    |  193.86 |    7.76 |  179.00 |  223.00
***** Episode 10230, Mean R = 186.4  Std R = 7.1  Min R = 173.6
PolicyLoss: -0.0219
Policy_Beta: 0.1
Policy_Entropy: 0.132
Policy_KL: 0.0011
Policy_SD: 0.837
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 1.98e+06
VF_0_ExplainedVarNew: 0.935
VF_0_ExplainedVarOld: 0.939
VF_0_Loss : 0.169


ADV1:  0.008923975985908099 0.05220857720801926 0.3818221139692054 -0.4249743738297632
ADV2:  -0.03480685211506318 0.7291697142110733 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0843   0.4762   2.1968   9.5102   3.3325   1.9925
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1256   0.0738   0.3230   1.2858   0.7421   0.3504
***** Episode 10261, Mean R = 186.7  Std R = 9.5  Min R = 164.6
PolicyLoss: 0.00255
Policy_Beta: 0.1
Policy_Entropy: 0.138
Policy_KL: 0.000848
Policy_SD: 0.842
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 1.99e+06
VF_0_ExplainedVarNew: 0.938
VF_0_ExplainedVarOld:

optflow_error |  0.0237 |  0.6180 |  0.0000 | 49.8816
pixel_icoords |    0.05    0.00 |    6.90    6.76 |  -15.94  -14.77 |   15.98   16.15
theta_cv |    0.05 |    0.04 |    0.00 |    0.30
steps    |  193.61 |    7.56 |  180.00 |  224.00
***** Episode 10540, Mean R = 186.1  Std R = 6.5  Min R = 174.1
PolicyLoss: 0.0129
Policy_Beta: 0.1
Policy_Entropy: 0.135
Policy_KL: 0.00121
Policy_SD: 0.832
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 2.04e+06
VF_0_ExplainedVarNew: 0.948
VF_0_ExplainedVarOld: 0.941
VF_0_Loss : 0.119


ADV1:  0.0014570500994222984 0.04168578707163814 0.24888841964573855 -0.41042357703299753
ADV2:  0.0082967200446194 0.715660198099313 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1425   0.7062   2.4918  14.8186   5.4349   4.5122
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0618   0.0354   0.1410   1.2858   0.7421   0.3504
***** Episode 10571, Mean R = 185.2  Std R = 5.9  Min R = 173.9
PolicyLoss: -0.0167
Policy_Beta: 0.1
Policy_Entropy: 0.133
P

theta_cv |    0.05 |    0.04 |    0.00 |    0.30
steps    |  193.26 |    6.85 |  180.00 |  218.00
***** Episode 10850, Mean R = 187.7  Std R = 8.2  Min R = 160.8
PolicyLoss: -0.00268
Policy_Beta: 0.1
Policy_Entropy: 0.135
Policy_KL: 0.00143
Policy_SD: 0.833
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 2.1e+06
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.206


ADV1:  -0.006023128886626277 0.04784633297010495 0.28864149762021485 -0.5439184873131585
ADV2:  0.04780360699527561 0.7052330402302757 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9172   0.8866   4.6250  14.8186   5.4349   4.5122
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1499   0.0779   0.2903   1.2858   0.7421   0.3504
***** Episode 10881, Mean R = 185.4  Std R = 5.7  Min R = 175.9
PolicyLoss: -0.0371
Policy_Beta: 0.1
Policy_Entropy: 0.134
Policy_KL: 0.00103
Policy_SD: 0.835
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 2.11e+06
VF_0_ExplainedVarNew: 0.94
VF_0_ExplainedVarOld:

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.02    0.07 |    0.27    1.78 |   -0.55   -3.12 |    0.58    3.12
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    0.03 |    0.57 |    0.00 |   10.00
tracking_rewards |  186.69 |    7.63 |  168.49 |  215.73
tracking_error |  0.0090 |  0.0118 |  0.0000 |  0.9502
optflow_error |  0.0176 |  0.4203 |  0.0000 | 43.7887
pixel_icoords |    0.39    0.09 |    6.64    6.94 |  -14.53  -17.98 |   16.07   15.82
theta_cv |    0.05 |    0.04 |    0.00 |    0.26
steps    |  193.33 |    7.65 |  179.00 |  225.00
***** Episode 11160, Mean R = 186.2  Std R = 6.9  Min R = 173.5
PolicyLoss: -0.0354
Policy_Beta: 0.1
Policy_Entropy: 0.13
Policy_KL: 0.0013

pixel_icoords |    0.38    0.00 |    7.10    7.04 |  -15.95  -16.33 |   16.15   16.64
theta_cv |    0.05 |    0.04 |    0.00 |    0.24
steps    |  194.04 |    7.33 |  180.00 |  219.00
***** Episode 11470, Mean R = 188.6  Std R = 7.0  Min R = 176.6
PolicyLoss: -0.00976
Policy_Beta: 0.1
Policy_Entropy: 0.13
Policy_KL: 0.000734
Policy_SD: 0.835
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 2.22e+06
VF_0_ExplainedVarNew: 0.939
VF_0_ExplainedVarOld: 0.935
VF_0_Loss : 0.161


ADV1:  -0.003185682770124503 0.052716043213390726 0.3605068946175357 -0.6298972593551413
ADV2:  0.022263950204752796 0.706170842092805 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3782   0.6861   2.8523  14.8186   5.4349   4.5122
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0360   0.0154   0.0763   1.2858   0.7421   0.3504
***** Episode 11501, Mean R = 187.2  Std R = 9.2  Min R = 175.8
PolicyLoss: -0.0181
Policy_Beta: 0.1
Policy_Entropy: 0.128
Policy_KL: 0.000832
Policy_SD: 0.833
Policy_lr_mult:

tracking_error |  0.0085 |  0.0105 |  0.0000 |  0.9106
optflow_error |  0.0132 |  0.3609 |  0.0000 | 40.6662
pixel_icoords |   -0.52   -0.18 |    6.51    6.54 |  -15.22  -15.64 |   15.15   15.06
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  193.19 |    7.51 |  180.00 |  227.00
***** Episode 11780, Mean R = 186.5  Std R = 6.1  Min R = 170.6
PolicyLoss: -0.0349
Policy_Beta: 0.1
Policy_Entropy: 0.134
Policy_KL: 0.00093
Policy_SD: 0.834
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 2.28e+06
VF_0_ExplainedVarNew: 0.948
VF_0_ExplainedVarOld: 0.941
VF_0_Loss : 0.093


ADV1:  -0.0010389018947061872 0.04271725913634034 0.2800449844284185 -0.5317889555044986
ADV2:  0.018297146481362863 0.701905718404891 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3215   0.9866   4.6567  14.8186   5.4349   4.5122
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0207   0.0096   0.0455   1.2858   0.7421   0.3504
***** Episode 11811, Mean R = 187.0  Std R = 6.6  Min R = 174.9
Pol

optflow_error |  0.0119 |  0.3349 |  0.0000 | 43.2747
pixel_icoords |    0.22   -0.45 |    6.69    6.47 |  -17.06  -15.65 |   17.59   15.33
theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  192.97 |    6.99 |  179.00 |  213.00
***** Episode 12090, Mean R = 185.7  Std R = 5.8  Min R = 175.2
PolicyLoss: -0.0303
Policy_Beta: 0.1
Policy_Entropy: 0.134
Policy_KL: 0.00112
Policy_SD: 0.83
Policy_lr_mult: 1
Steps: 5.93e+03
TotalSteps: 2.34e+06
VF_0_ExplainedVarNew: 0.952
VF_0_ExplainedVarOld: 0.948
VF_0_Loss : 0.0751


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.48934556e-13 2.68884927e-12] 3.0628792361540385e-12
ADV1:  -0.0033631842966587557 0.05100685975212175 0.44985200667540104 -0.5834644811279583
ADV2:  0.020000936879955463 0.6434556791197832 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6367   1.1615   5.0834  14.8186   5.4349   4.5122
ValFun  Gra

theta_cv |    0.05 |    0.04 |    0.00 |    0.30
steps    |  193.55 |    7.15 |  180.00 |  217.00
***** Episode 12400, Mean R = 189.7  Std R = 7.2  Min R = 177.3
PolicyLoss: 0.00146
Policy_Beta: 0.1
Policy_Entropy: 0.134
Policy_KL: 0.00116
Policy_SD: 0.834
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 2.4e+06
VF_0_ExplainedVarNew: 0.938
VF_0_ExplainedVarOld: 0.936
VF_0_Loss : 0.202


ADV1:  -0.002854391049240029 0.04563747608386004 0.3728482010556038 -0.4701918485271198
ADV2:  0.019583871113148178 0.7164642312412065 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.1945   0.9593   5.9211  14.8186   5.4349   4.5122
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0176   0.0091   0.0345   1.2858   0.7421   0.3504
***** Episode 12431, Mean R = 189.0  Std R = 7.7  Min R = 177.2
PolicyLoss: -0.014
Policy_Beta: 0.1
Policy_Entropy: 0.136
Policy_KL: 0.00117
Policy_SD: 0.837
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 2.41e+06
VF_0_ExplainedVarNew: 0.939
VF_0_ExplainedVarOld: 

tracking_error |  0.0089 |  0.0118 |  0.0000 |  1.1158
optflow_error |  0.0136 |  0.4350 |  0.0000 | 51.9408
pixel_icoords |   -0.01    0.83 |    6.71    6.67 |  -15.91  -17.41 |   19.06   16.54
theta_cv |    0.05 |    0.04 |    0.00 |    0.29
steps    |  193.25 |    6.88 |  179.00 |  222.00
***** Episode 12710, Mean R = 186.7  Std R = 6.2  Min R = 175.7
PolicyLoss: -0.00105
Policy_Beta: 0.1
Policy_Entropy: 0.137
Policy_KL: 0.000562
Policy_SD: 0.833
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 2.46e+06
VF_0_ExplainedVarNew: 0.953
VF_0_ExplainedVarOld: 0.952
VF_0_Loss : 0.121


ADV1:  0.0027764454594982105 0.04492269912395934 0.3654283865630198 -0.46797143970705923
ADV2:  -0.008172481705957486 0.7113572401273063 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.5853   2.5432  11.9065  14.8186   5.4349   4.5122
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0177   0.0089   0.0398   1.2858   0.7421   0.3504
***** Episode 12741, Mean R = 187.8  Std R = 5.8  Min R = 176.8

***** Episode 13020, Mean R = 186.4  Std R = 6.8  Min R = 177.1
PolicyLoss: -0.00243
Policy_Beta: 0.1
Policy_Entropy: 0.139
Policy_KL: 0.00113
Policy_SD: 0.846
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 2.52e+06
VF_0_ExplainedVarNew: 0.941
VF_0_ExplainedVarOld: 0.943
VF_0_Loss : 0.128


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.48934556e-13 2.68884927e-12] 3.0628792361540385e-12
ADV1:  0.00026454811332958056 0.039297833927000474 0.36176768758549643 -0.4480258546852352
ADV2:  0.005707334088063751 0.7105631941000923 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5144   0.8132   4.6340  23.1107   7.1162   7.2677
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0241   0.0125   0.0435   1.2858   0.7421   0.3504
***** Episode 13051, Mean R = 187.4  Std R = 5.3  Min R = 177.3
PolicyLoss: -0.00882
Policy_Beta: 0.1
Policy_Entropy: 0.131
Policy_KL: 0.00125
P

theta_cv |    0.05 |    0.04 |    0.00 |    0.25
steps    |  193.89 |    6.98 |  180.00 |  212.00
***** Episode 13330, Mean R = 189.4  Std R = 8.6  Min R = 176.7
PolicyLoss: 0.00252
Policy_Beta: 0.1
Policy_Entropy: 0.131
Policy_KL: 0.00107
Policy_SD: 0.831
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 2.58e+06
VF_0_ExplainedVarNew: 0.916
VF_0_ExplainedVarOld: 0.935
VF_0_Loss : 0.239


ADV1:  0.0019049580331461239 0.044229396807497746 0.4064727519345034 -0.47903781273954005
ADV2:  -0.001479827764020214 0.6930947722002679 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2913   0.6718   3.3502  23.1107   7.1162   7.2677
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0183   0.0087   0.0354   1.2858   0.7421   0.3504
***** Episode 13361, Mean R = 188.6  Std R = 7.9  Min R = 174.8
PolicyLoss: -0.0112
Policy_Beta: 0.1
Policy_Entropy: 0.131
Policy_KL: 0.00142
Policy_SD: 0.833
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 2.59e+06
VF_0_ExplainedVarNew: 0.933
VF_0_ExplainedVar

tracking_error |  0.0072 |  0.0081 |  0.0000 |  0.7487
optflow_error |  0.0097 |  0.2594 |  0.0000 | 35.0573
pixel_icoords |   -0.38    0.15 |    7.08    6.85 |  -17.42  -15.03 |   16.21   16.66
theta_cv |    0.05 |    0.04 |    0.00 |    0.23
steps    |  193.26 |    7.34 |  180.00 |  214.00
***** Episode 13640, Mean R = 188.6  Std R = 7.1  Min R = 173.9
PolicyLoss: -0.0118
Policy_Beta: 0.1
Policy_Entropy: 0.131
Policy_KL: 0.00106
Policy_SD: 0.835
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 2.64e+06
VF_0_ExplainedVarNew: 0.937
VF_0_ExplainedVarOld: 0.939
VF_0_Loss : 0.198


ADV1:  0.0024002895603815583 0.043965951666292094 0.39753057711185624 -0.38212787578921836
ADV2:  -0.027685348819012235 0.7122703809465979 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.4156   1.8037   9.1337  23.1107   7.1162   7.2677
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0548   0.0293   0.1054   1.2858   0.7421   0.3504
***** Episode 13671, Mean R = 189.8  Std R = 6.0  Min R = 178.8

theta_cv |    0.05 |    0.04 |    0.00 |    0.22
steps    |  192.95 |    6.97 |  179.00 |  213.00
***** Episode 13950, Mean R = 189.2  Std R = 7.7  Min R = 174.9
PolicyLoss: -0.00855
Policy_Beta: 0.1
Policy_Entropy: 0.135
Policy_KL: 0.000918
Policy_SD: 0.839
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 2.7e+06
VF_0_ExplainedVarNew: 0.957
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.208


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.48934556e-13 2.68884927e-12] 3.0628792361540385e-12
ADV1:  0.004089854569924574 0.0393955307068935 0.3766287607055795 -0.4075342638112571
ADV2:  -0.037266138233116274 0.679614794814266 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.0770   1.3773   6.9937  23.1107   7.1162   7.2677
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0189   0.0107   0.0407   1.2858   0.7421   0.3504
***** Episode 13981, Mean R = 189.8  Std R = 8.3  Min R

tracking_error |  0.0079 |  0.0093 |  0.0000 |  1.0732
optflow_error |  0.0100 |  0.3235 |  0.0000 | 50.2952
pixel_icoords |   -0.19   -0.24 |    6.32    6.60 |  -17.82  -15.45 |   15.21   15.84
theta_cv |    0.05 |    0.04 |    0.00 |    0.22
steps    |  192.88 |    7.27 |  180.00 |  216.00
***** Episode 14260, Mean R = 189.7  Std R = 7.1  Min R = 176.3
PolicyLoss: -0.015
Policy_Beta: 0.1
Policy_Entropy: 0.134
Policy_KL: 0.00165
Policy_SD: 0.841
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 2.76e+06
VF_0_ExplainedVarNew: 0.948
VF_0_ExplainedVarOld: 0.946
VF_0_Loss : 0.173


ADV1:  -0.0007443591379945569 0.045070164385475925 0.40331695239191967 -0.5228388170411997
ADV2:  0.011102756253709918 0.7275596334738418 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.7933   1.9176   8.4692  30.8904  11.3286  10.4707
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0123   0.0050   0.0210   1.2858   0.7421   0.3504
***** Episode 14291, Mean R = 189.3  Std R = 7.3  Min R = 178.2
Poli

***** Episode 14570, Mean R = 189.8  Std R = 8.8  Min R = 178.4
PolicyLoss: -0.0118
Policy_Beta: 0.1
Policy_Entropy: 0.136
Policy_KL: 0.00106
Policy_SD: 0.849
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 2.82e+06
VF_0_ExplainedVarNew: 0.939
VF_0_ExplainedVarOld: 0.94
VF_0_Loss : 0.169


ADV1:  0.0028662560577079705 0.04361228009269256 0.5610676407814026 -0.4547846421511967
ADV2:  -0.02860301648474276 0.6927330387302221 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.0088   1.4443   7.5798  30.8904  11.3286  10.4707
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0426   0.0249   0.0934   1.2858   0.7421   0.3504
***** Episode 14601, Mean R = 191.3  Std R = 7.3  Min R = 174.6
PolicyLoss: 0.0157
Policy_Beta: 0.1
Policy_Entropy: 0.137
Policy_KL: 0.000696
Policy_SD: 0.852
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 2.83e+06
VF_0_ExplainedVarNew: 0.934
VF_0_ExplainedVarOld: 0.932
VF_0_Loss : 0.202


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13

optflow_error |  0.0097 |  0.3246 |  0.0000 | 41.8657
pixel_icoords |    0.34    0.56 |    6.68    6.63 |  -16.58  -16.52 |   15.07   16.20
theta_cv |    0.05 |    0.04 |    0.00 |    0.28
steps    |  192.71 |    7.10 |  179.00 |  217.00
***** Episode 14880, Mean R = 190.3  Std R = 7.0  Min R = 176.9
PolicyLoss: -0.0132
Policy_Beta: 0.1
Policy_Entropy: 0.137
Policy_KL: 0.000839
Policy_SD: 0.845
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 2.88e+06
VF_0_ExplainedVarNew: 0.956
VF_0_ExplainedVarOld: 0.955
VF_0_Loss : 0.141


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.48934556e-13 2.68884927e-12] 3.0628792361540385e-12
ADV1:  0.004131560343821982 0.03390091011718255 0.2925288436864567 -0.29536930729763344
ADV2:  -0.03235925489089971 0.7146637488479247 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.6685   1.9758   8.4634  30.8904  11.3286  10.4707
ValFun  Grad

***** Episode 15190, Mean R = 189.5  Std R = 9.1  Min R = 177.9
PolicyLoss: -0.0124
Policy_Beta: 0.1
Policy_Entropy: 0.135
Policy_KL: 0.000818
Policy_SD: 0.848
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 2.94e+06
VF_0_ExplainedVarNew: 0.945
VF_0_ExplainedVarOld: 0.94
VF_0_Loss : 0.198


ADV1:  -0.005136417723103709 0.04196509210789665 0.2873311879136634 -0.5934981928246964
ADV2:  0.04557235590994971 0.6808864188836019 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.7763   1.2226   6.5556  30.8904  11.3286  10.4707
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0383   0.0178   0.0724   1.2858   0.7421   0.3504
***** Episode 15221, Mean R = 190.0  Std R = 7.7  Min R = 177.5
PolicyLoss: -0.0296
Policy_Beta: 0.1
Policy_Entropy: 0.137
Policy_KL: 0.00167
Policy_SD: 0.841
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 2.95e+06
VF_0_ExplainedVarNew: 0.937
VF_0_ExplainedVarOld: 0.938
VF_0_Loss : 0.168


ADV1:  0.00453191836332262 0.032821838155029315 0.2402164186593006 -0.3593

theta_cv |    0.05 |    0.04 |    0.00 |    0.20
steps    |  192.44 |    7.38 |  180.00 |  224.00
***** Episode 15500, Mean R = 186.5  Std R = 5.5  Min R = 175.3
PolicyLoss: -0.0372
Policy_Beta: 0.1
Policy_Entropy: 0.138
Policy_KL: 0.00122
Policy_SD: 0.845
Policy_lr_mult: 1
Steps: 5.91e+03
TotalSteps: 3e+06
VF_0_ExplainedVarNew: 0.941
VF_0_ExplainedVarOld: 0.939
VF_0_Loss : 0.0788


ADV1:  0.013883995935775068 0.046422331423749644 0.32089650630664124 -0.3278709931982262
ADV2:  -0.03322942967955895 0.8694880339832424 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  18.4480  12.0199  39.3504  39.3504  18.4480  12.0199
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0543   0.0351   0.1299   1.2858   0.7421   0.3504
***** Episode 15531, Mean R = 190.3  Std R = 7.2  Min R = 179.0
PolicyLoss: 0.0156
Policy_Beta: 0.1
Policy_Entropy: 0.139
Policy_KL: 0.00487
Policy_SD: 0.85
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 3.01e+06
VF_0_ExplainedVarNew: 0.932
VF_0_ExplainedVarOld: 0

tracking_error |  0.0060 |  0.0068 |  0.0000 |  0.6975
optflow_error |  0.0074 |  0.2365 |  0.0000 | 33.7461
pixel_icoords |   -0.03    0.49 |    6.86    7.03 |  -15.35  -15.50 |   15.78   17.09
theta_cv |    0.05 |    0.04 |    0.00 |    0.20
steps    |  193.38 |    7.25 |  179.00 |  214.00
***** Episode 15810, Mean R = 190.2  Std R = 8.0  Min R = 178.1
PolicyLoss: -0.0325
Policy_Beta: 0.1
Policy_Entropy: 0.137
Policy_KL: 0.00141
Policy_SD: 0.839
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 3.06e+06
VF_0_ExplainedVarNew: 0.956
VF_0_ExplainedVarOld: 0.954
VF_0_Loss : 0.247


ADV1:  0.0010868166395177487 0.040947220567885766 0.31331137923691466 -0.45530011883409127
ADV2:  0.014560026052307024 0.7007281855808036 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3742   0.7124   3.3090  39.3504  18.4480  12.0199
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0163   0.0078   0.0307   1.2858   0.7421   0.3504
***** Episode 15841, Mean R = 191.2  Std R = 8.9  Min R = 177.2


optflow_error |  0.0074 |  0.2417 |  0.0000 | 30.9740
pixel_icoords |   -0.39   -0.07 |    6.66    6.66 |  -15.88  -17.01 |   16.80   15.30
theta_cv |    0.05 |    0.04 |    0.00 |    0.19
steps    |  193.50 |    7.16 |  179.00 |  217.00
***** Episode 16120, Mean R = 191.2  Std R = 7.2  Min R = 178.0
PolicyLoss: 0.00621
Policy_Beta: 0.1
Policy_Entropy: 0.138
Policy_KL: 0.00119
Policy_SD: 0.837
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 3.12e+06
VF_0_ExplainedVarNew: 0.957
VF_0_ExplainedVarOld: 0.954
VF_0_Loss : 0.229


ADV1:  0.0007080302670256453 0.03464811967689171 0.22256630322610194 -0.39327624044613385
ADV2:  0.016842863352678938 0.7212474936296075 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.7678   2.7144  12.0158  39.3504  18.4480  12.0199
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0115   0.0064   0.0228   1.2858   0.7421   0.3504
***** Episode 16151, Mean R = 192.5  Std R = 7.2  Min R = 176.8
PolicyLoss: -0.0259
Policy_Beta: 0.1
Policy_Entropy: 0.1

***** Episode 16430, Mean R = 189.6  Std R = 7.5  Min R = 177.8
PolicyLoss: -0.0112
Policy_Beta: 0.1
Policy_Entropy: 0.138
Policy_KL: 0.00103
Policy_SD: 0.847
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 3.18e+06
VF_0_ExplainedVarNew: 0.949
VF_0_ExplainedVarOld: 0.949
VF_0_Loss : 0.175


ADV1:  0.008725134169881823 0.04195979518394193 0.4262491345405579 -0.49524831771850586
ADV2:  -0.05534579288386045 0.7749840909315538 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.7442   2.4486  10.9288  39.3504  18.4480  12.0199
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0492   0.0261   0.1069   1.2858   0.7421   0.3504
***** Episode 16461, Mean R = 193.1  Std R = 7.7  Min R = 179.6
PolicyLoss: 0.0151
Policy_Beta: 0.1
Policy_Entropy: 0.143
Policy_KL: 0.000669
Policy_SD: 0.853
Policy_lr_mult: 1
Steps: 6.07e+03
TotalSteps: 3.19e+06
VF_0_ExplainedVarNew: 0.939
VF_0_ExplainedVarOld: 0.936
VF_0_Loss : 0.179


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.1

theta_cv |    0.05 |    0.04 |    0.00 |    0.18
steps    |  193.52 |    6.95 |  180.00 |  218.00
***** Episode 16740, Mean R = 189.3  Std R = 6.5  Min R = 177.3
PolicyLoss: -0.0204
Policy_Beta: 0.1
Policy_Entropy: 0.137
Policy_KL: 0.0013
Policy_SD: 0.838
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 3.24e+06
VF_0_ExplainedVarNew: 0.956
VF_0_ExplainedVarOld: 0.957
VF_0_Loss : 0.0996


ADV1:  0.002532939907202746 0.036097799671296296 0.2911321970000008 -0.36687439379252984
ADV2:  -0.004716600155893694 0.7083114692442442 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5049   0.7980   4.2771  39.3504  18.4480  12.0199
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0193   0.0095   0.0376   1.2858   0.7421   0.3504
***** Episode 16771, Mean R = 189.9  Std R = 6.1  Min R = 177.8
PolicyLoss: -0.00668
Policy_Beta: 0.1
Policy_Entropy: 0.139
Policy_KL: 0.000921
Policy_SD: 0.848
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 3.25e+06
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVa

theta_cv |    0.05 |    0.04 |    0.00 |    0.19
steps    |  192.94 |    6.69 |  181.00 |  212.00
***** Episode 17050, Mean R = 190.8  Std R = 5.8  Min R = 180.1
PolicyLoss: -0.0198
Policy_Beta: 0.1
Policy_Entropy: 0.14
Policy_KL: 0.000563
Policy_SD: 0.844
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 3.3e+06
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.958
VF_0_Loss : 0.0932


ADV1:  0.0006361327419857561 0.03123087966596068 0.19850871844663165 -0.39423253269699843
ADV2:  0.011166553911011042 0.7280345936911565 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.7091   2.3730   9.6839  39.3504  18.4480  12.0199
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0064   0.0030   0.0111   1.2858   0.7421   0.3504
***** Episode 17081, Mean R = 192.3  Std R = 8.1  Min R = 176.7
PolicyLoss: -0.0215
Policy_Beta: 0.1
Policy_Entropy: 0.138
Policy_KL: 0.00108
Policy_SD: 0.846
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 3.31e+06
VF_0_ExplainedVarNew: 0.958
VF_0_ExplainedVarO

***** Episode 17360, Mean R = 190.8  Std R = 8.1  Min R = 180.3
PolicyLoss: -0.0226
Policy_Beta: 0.1
Policy_Entropy: 0.139
Policy_KL: 0.00143
Policy_SD: 0.849
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 3.36e+06
VF_0_ExplainedVarNew: 0.952
VF_0_ExplainedVarOld: 0.948
VF_0_Loss : 0.144


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.48934556e-13 2.68884927e-12] 3.0628792361540385e-12
ADV1:  -0.003228156462215269 0.03335680302419644 0.2839733185423089 -0.30690743761697625
ADV2:  0.00880757615106305 0.7267151214211576 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.1762   1.4320   7.3479  44.0068  26.0980  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0118   0.0075   0.0255   1.2858   0.7421   0.3504
***** Episode 17391, Mean R = 189.4  Std R = 6.5  Min R = 175.7
PolicyLoss: 0.00337
Policy_Beta: 0.1
Policy_Entropy: 0.138
Policy_KL: 0.000964
Polic

pixel_icoords |   -0.19   -0.19 |    6.89    6.54 |  -14.95  -15.03 |   16.46   16.25
theta_cv |    0.05 |    0.04 |    0.00 |    0.19
steps    |  193.07 |    7.54 |  179.00 |  214.00
***** Episode 17670, Mean R = 188.6  Std R = 7.4  Min R = 177.6
PolicyLoss: 0.00634
Policy_Beta: 0.1
Policy_Entropy: 0.144
Policy_KL: 0.000854
Policy_SD: 0.854
Policy_lr_mult: 1
Steps: 5.92e+03
TotalSteps: 3.42e+06
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.956
VF_0_Loss : 0.131


ADV1:  0.002390646886077515 0.03803300568502072 0.32669593411512277 -0.4005168782490148
ADV2:  -0.018583952234237606 0.6977530910850688 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.4158   2.8505  13.6867  44.0068  26.0980  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0135   0.0070   0.0262   1.2858   0.7421   0.3504
***** Episode 17701, Mean R = 192.2  Std R = 6.8  Min R = 175.5
PolicyLoss: 0.00367
Policy_Beta: 0.1
Policy_Entropy: 0.139
Policy_KL: 0.00147
Policy_SD: 0.852
Policy_lr_mult:

***** Episode 17980, Mean R = 189.9  Std R = 7.4  Min R = 176.6
PolicyLoss: -0.027
Policy_Beta: 0.1
Policy_Entropy: 0.138
Policy_KL: 0.000853
Policy_SD: 0.848
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 3.48e+06
VF_0_ExplainedVarNew: 0.954
VF_0_ExplainedVarOld: 0.955
VF_0_Loss : 0.0691


ADV1:  0.0030768154819177137 0.0340320079743201 0.2565402090056803 -0.29842990342345366
ADV2:  -0.019171592624517352 0.7783287096226764 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.8105   1.9018   9.0962  44.0068  26.0980  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0218   0.0164   0.0579   1.2858   0.7421   0.3504
***** Episode 18011, Mean R = 189.7  Std R = 7.9  Min R = 177.6
PolicyLoss: 0.0035
Policy_Beta: 0.1
Policy_Entropy: 0.139
Policy_KL: 0.000945
Policy_SD: 0.847
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 3.49e+06
VF_0_ExplainedVarNew: 0.953
VF_0_ExplainedVarOld: 0.958
VF_0_Loss : 0.156


ADV1:  0.0030463702877113396 0.03558443818601314 0.28807415379249723

theta_cv |    0.05 |    0.04 |    0.00 |    0.20
steps    |  192.59 |    6.77 |  180.00 |  211.00
***** Episode 18290, Mean R = 192.0  Std R = 10.1  Min R = 178.8
PolicyLoss: -0.0142
Policy_Beta: 0.1
Policy_Entropy: 0.133
Policy_KL: 0.000672
Policy_SD: 0.844
Policy_lr_mult: 1
Steps: 5.93e+03
TotalSteps: 3.54e+06
VF_0_ExplainedVarNew: 0.943
VF_0_ExplainedVarOld: 0.94
VF_0_Loss : 0.157


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.48957513e-13 2.68884927e-12] 3.0628811020422756e-12
ADV1:  -0.0018714386420640543 0.03533995976280377 0.3739318195820701 -0.36524771147735413
ADV2:  -0.0034950643803139682 0.7277058461420133 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.6601   1.4377   7.8158  44.0068  26.0980  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0067   0.0026   0.0116   1.2858   0.7421   0.3504
***** Episode 18321, Mean R = 191.3  Std R = 

theta_cv |    0.05 |    0.04 |    0.00 |    0.20
steps    |  192.82 |    6.95 |  179.00 |  214.00
***** Episode 18600, Mean R = 193.5  Std R = 8.8  Min R = 180.2
PolicyLoss: -0.00207
Policy_Beta: 0.1
Policy_Entropy: 0.135
Policy_KL: 0.00086
Policy_SD: 0.846
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 3.6e+06
VF_0_ExplainedVarNew: 0.953
VF_0_ExplainedVarOld: 0.951
VF_0_Loss : 0.237


ADV1:  -0.002446517539152192 0.03825797046138043 0.30474601452756434 -0.3516398843705166
ADV2:  0.01339476744175518 0.7177968383280606 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8072   0.5581   2.1893  44.0068  26.0980  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0054   0.0022   0.0104   1.2858   0.7421   0.3504
***** Episode 18631, Mean R = 193.7  Std R = 9.2  Min R = 177.4
PolicyLoss: -0.00136
Policy_Beta: 0.1
Policy_Entropy: 0.133
Policy_KL: 0.000413
Policy_SD: 0.843
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 3.61e+06
VF_0_ExplainedVarNew: 0.943
VF_0_ExplainedVarO

pixel_icoords |    0.12    0.77 |    6.82    6.61 |  -15.98  -15.68 |   15.86   16.38
theta_cv |    0.05 |    0.04 |    0.00 |    0.19
steps    |  193.73 |    7.26 |  180.00 |  213.00
***** Episode 18910, Mean R = 192.3  Std R = 5.7  Min R = 181.0
PolicyLoss: -0.0246
Policy_Beta: 0.1
Policy_Entropy: 0.137
Policy_KL: 0.000781
Policy_SD: 0.845
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 3.66e+06
VF_0_ExplainedVarNew: 0.958
VF_0_ExplainedVarOld: 0.957
VF_0_Loss : 0.0946


ADV1:  -0.0012683599447746619 0.035246707868970543 0.386095255613327 -0.2882465365905498
ADV2:  -0.0011821294138611477 0.7566709612315683 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.2926   2.3026  11.7768  44.0068  26.0980  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0195   0.0097   0.0364   1.2858   0.7421   0.3504
***** Episode 18941, Mean R = 193.6  Std R = 7.8  Min R = 181.0
PolicyLoss: 0.00617
Policy_Beta: 0.1
Policy_Entropy: 0.134
Policy_KL: 0.000852
Policy_SD: 0.844
Policy_lr_m

theta_cv |    0.05 |    0.04 |    0.00 |    0.21
steps    |  194.22 |    7.35 |  180.00 |  218.00
***** Episode 19220, Mean R = 193.2  Std R = 7.9  Min R = 176.7
PolicyLoss: -0.0303
Policy_Beta: 0.1
Policy_Entropy: 0.138
Policy_KL: 0.00115
Policy_SD: 0.85
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 3.72e+06
VF_0_ExplainedVarNew: 0.942
VF_0_ExplainedVarOld: 0.938
VF_0_Loss : 0.174


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.48957513e-13 2.68884927e-12] 3.0628811020422756e-12
ADV1:  0.004454649861042244 0.03141424338972161 0.2530770592581155 -0.268390880962649
ADV2:  -0.03532130154489281 0.7842111284538462 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.7253   3.2466  16.0111  44.4876  26.0980  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0372   0.0254   0.1027   1.2858   0.7421   0.3504
***** Episode 19251, Mean R = 195.1  Std R = 8.4  Min

a_f      |   -0.01    0.01 |    0.27    1.71 |   -0.67   -3.12 |    0.55    3.09
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    2.94 |    4.55 |    0.00 |   10.00
tracking_rewards |  190.46 |    7.22 |  175.07 |  210.38
tracking_error |  0.0057 |  0.0055 |  0.0000 |  0.5601
optflow_error |  0.0048 |  0.1429 |  0.0000 | 26.6834
pixel_icoords |   -0.13    0.31 |    6.63    6.93 |  -18.30  -16.70 |   14.86   17.42
theta_cv |    0.05 |    0.04 |    0.00 |    0.21
steps    |  193.56 |    7.27 |  179.00 |  213.00
***** Episode 19530, Mean R = 192.3  Std R = 8.1  Min R = 178.6
PolicyLoss: 0.00448
Policy_Beta: 0.1
Policy_Entropy: 0.141
Policy_KL: 0.00142
Policy_SD: 0.856
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 3.78e+06
VF_0_ExplainedVarNew: 0.959
VF_0_Expla

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.00    0.01 |    0.28    1.77 |   -0.62   -3.05 |    0.69    3.14
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    3.23 |    4.67 |    0.00 |   10.00
tracking_rewards |  190.06 |    7.25 |  176.33 |  214.13
tracking_error |  0.0054 |  0.0049 |  0.0000 |  0.4727
optflow_error |  0.0046 |  0.1201 |  0.0000 | 22.1548
pixel_icoords |    0.04   -0.25 |    6.90    7.00 |  -16.38  -17.42 |   16.96   16.23
theta_cv |    0.05 |    0.04 |    0.00 |    0.19
steps    |  193.10 |    7.31 |  180.00 |  217.00
***** Episode 19840, Mean R = 192.8  Std R = 8.4  Min R = 178.0
PolicyLoss: -0.0183
Policy_Beta: 0.1
Policy_Entropy: 0.15
Policy_KL: 0.0007

optflow_error |  0.0038 |  0.0900 |  0.0000 | 14.9925
pixel_icoords |    0.24    0.59 |    6.59    6.82 |  -17.72  -17.03 |   15.51   16.58
theta_cv |    0.05 |    0.04 |    0.00 |    0.21
steps    |  193.02 |    7.03 |  180.00 |  223.00
***** Episode 20150, Mean R = 193.3  Std R = 8.2  Min R = 176.2
PolicyLoss: -0.0106
Policy_Beta: 0.1
Policy_Entropy: 0.149
Policy_KL: 0.00144
Policy_SD: 0.854
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 3.9e+06
VF_0_ExplainedVarNew: 0.957
VF_0_ExplainedVarOld: 0.957
VF_0_Loss : 0.15


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68884927e-12] 3.0631211278171617e-12
ADV1:  -0.0026258935286286767 0.035005167575220285 0.3113890860261541 -0.24464340523654
ADV2:  -0.0014206909262040256 0.7017445837199487 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7499   1.0513   4.0036  44.4876  26.0980  16.3562
ValFun  Gradi

theta_cv |    0.05 |    0.04 |    0.00 |    0.20
steps    |  193.01 |    7.27 |  180.00 |  213.00
***** Episode 20460, Mean R = 192.3  Std R = 7.5  Min R = 179.6
PolicyLoss: -0.00862
Policy_Beta: 0.1
Policy_Entropy: 0.15
Policy_KL: 0.00147
Policy_SD: 0.854
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 3.96e+06
VF_0_ExplainedVarNew: 0.937
VF_0_ExplainedVarOld: 0.943
VF_0_Loss : 0.119


ADV1:  0.012944947018775306 0.03846768904330957 0.2895918690062774 -0.2358424023465856
ADV2:  -0.04949178740005638 0.8227819717568875 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.7593   2.4456  11.4493  44.4876  26.0980  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0312   0.0149   0.0669   1.2858   0.7421   0.3504
***** Episode 20491, Mean R = 195.4  Std R = 9.2  Min R = 177.3
PolicyLoss: 0.0168
Policy_Beta: 0.1
Policy_Entropy: 0.146
Policy_KL: 0.00216
Policy_SD: 0.846
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 3.97e+06
VF_0_ExplainedVarNew: 0.95
VF_0_ExplainedVarOld: 0

tracking_error |  0.0051 |  0.0049 |  0.0000 |  0.5808
optflow_error |  0.0046 |  0.1521 |  0.0000 | 27.2455
pixel_icoords |   -0.57    0.19 |    6.61    6.91 |  -15.67  -15.86 |   16.49   14.77
theta_cv |    0.05 |    0.04 |    0.00 |    0.19
steps    |  193.16 |    7.33 |  179.00 |  215.00
***** Episode 20770, Mean R = 193.5  Std R = 8.5  Min R = 177.2
PolicyLoss: -0.0178
Policy_Beta: 0.1
Policy_Entropy: 0.16
Policy_KL: 0.000813
Policy_SD: 0.862
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 4.02e+06
VF_0_ExplainedVarNew: 0.949
VF_0_ExplainedVarOld: 0.949
VF_0_Loss : 0.194


ADV1:  -0.0022466703549554796 0.03221847527302943 0.38263692899595525 -0.3196277100517664
ADV2:  0.01411856846286779 0.7079678283699754 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.4184   2.2807  11.9269  44.4876  26.0980  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0055   0.0026   0.0123   1.2858   0.7421   0.3504
***** Episode 20801, Mean R = 192.0  Std R = 7.8  Min R = 179.1
Po

***** Episode 21080, Mean R = 194.3  Std R = 9.3  Min R = 179.0
PolicyLoss: 0.0219
Policy_Beta: 0.1
Policy_Entropy: 0.158
Policy_KL: 0.000927
Policy_SD: 0.86
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 4.08e+06
VF_0_ExplainedVarNew: 0.948
VF_0_ExplainedVarOld: 0.947
VF_0_Loss : 0.205


ADV1:  0.001116204786942478 0.03416086869918128 0.20328212724071149 -0.3179851461727086
ADV2:  0.012702831843529687 0.7696928858913418 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.8991   1.6264   6.4949  45.8351  26.0980  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0083   0.0046   0.0178   1.2858   0.7421   0.3504
***** Episode 21111, Mean R = 193.6  Std R = 10.0  Min R = 177.1
PolicyLoss: -0.0221
Policy_Beta: 0.1
Policy_Entropy: 0.154
Policy_KL: 0.000992
Policy_SD: 0.854
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 4.09e+06
VF_0_ExplainedVarNew: 0.954
VF_0_ExplainedVarOld: 0.955
VF_0_Loss : 0.147


ADV1:  0.003936014269732508 0.03505026890841804 0.3711693111671749 -0

tracking_error |  0.0052 |  0.0047 |  0.0000 |  0.5226
optflow_error |  0.0046 |  0.1393 |  0.0000 | 24.3888
pixel_icoords |   -0.18    0.29 |    7.08    6.76 |  -16.89  -16.38 |   15.11   14.49
theta_cv |    0.05 |    0.04 |    0.00 |    0.20
steps    |  192.92 |    7.27 |  180.00 |  213.00
***** Episode 21390, Mean R = 195.4  Std R = 8.7  Min R = 178.8
PolicyLoss: -0.00741
Policy_Beta: 0.1
Policy_Entropy: 0.164
Policy_KL: 0.00066
Policy_SD: 0.866
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 4.14e+06
VF_0_ExplainedVarNew: 0.96
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.229


ADV1:  0.004916041169259048 0.03458664762808456 0.3253179995870137 -0.27698738266629114
ADV2:  -0.04742789455676682 0.7201299774197708 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.0436   3.5811  19.3636  52.6418  30.7751  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0134   0.0071   0.0257   1.2858   0.7421   0.3504
***** Episode 21421, Mean R = 195.7  Std R = 7.5  Min R = 182.9
Poli

theta_cv |    0.05 |    0.04 |    0.00 |    0.19
steps    |  193.01 |    6.68 |  180.00 |  215.00
***** Episode 21700, Mean R = 194.8  Std R = 7.3  Min R = 178.6
PolicyLoss: -0.0141
Policy_Beta: 0.1
Policy_Entropy: 0.158
Policy_KL: 0.000882
Policy_SD: 0.863
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 4.2e+06
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.218


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68890964e-12] 3.063174122265867e-12
ADV1:  0.005018582092100821 0.03540879747905486 0.29149583574310556 -0.27883775869197003
ADV2:  -0.026957625258889742 0.8105607380404268 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.8168   4.2854  17.4343  52.6418  30.7751  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0221   0.0114   0.0475   1.2858   0.7421   0.3504
***** Episode 21731, Mean R = 194.0  Std R = 7.5 

optflow_error |  0.0044 |  0.0823 |  0.0000 | 13.5821
pixel_icoords |   -0.09   -0.33 |    7.00    6.87 |  -15.57  -15.59 |   17.31   17.05
theta_cv |    0.05 |    0.03 |    0.00 |    0.21
steps    |  193.92 |    7.54 |  180.00 |  217.00
***** Episode 22010, Mean R = 194.5  Std R = 7.7  Min R = 178.8
PolicyLoss: -0.0204
Policy_Beta: 0.1
Policy_Entropy: 0.17
Policy_KL: 0.00147
Policy_SD: 0.862
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 4.26e+06
VF_0_ExplainedVarNew: 0.951
VF_0_ExplainedVarOld: 0.953
VF_0_Loss : 0.206


ADV1:  0.003388127273501755 0.033661479787302294 0.270161680745178 -0.24534620441730912
ADV2:  -0.01671336018728616 0.790986091284254 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.2936   2.3268   9.7978  52.6418  30.7751  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0076   0.0030   0.0136   1.2858   0.7421   0.3504
***** Episode 22041, Mean R = 195.0  Std R = 7.9  Min R = 180.8
PolicyLoss: -7e-06
Policy_Beta: 0.1
Policy_Entropy: 0.17
Pol

theta_cv |    0.05 |    0.04 |    0.00 |    0.20
steps    |  193.29 |    6.96 |  179.00 |  214.00
***** Episode 22320, Mean R = 192.4  Std R = 7.4  Min R = 179.9
PolicyLoss: -0.02
Policy_Beta: 0.1
Policy_Entropy: 0.174
Policy_KL: 0.00588
Policy_SD: 0.855
Policy_lr_mult: 1
Steps: 5.92e+03
TotalSteps: 4.32e+06
VF_0_ExplainedVarNew: 0.946
VF_0_ExplainedVarOld: 0.946
VF_0_Loss : 0.132


ADV1:  0.004701041259975854 0.0319291558873701 0.24867844350318324 -0.23403875903876986
ADV2:  -0.05153449722131225 0.7431879521016296 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.4262   3.2149  16.3352  52.6418  30.7751  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0499   0.0249   0.1176   1.2858   0.7421   0.3504
***** Episode 22351, Mean R = 196.5  Std R = 8.2  Min R = 186.3
PolicyLoss: 0.017
Policy_Beta: 0.1
Policy_Entropy: 0.167
Policy_KL: 0.000613
Policy_SD: 0.86
Policy_lr_mult: 1
Steps: 6.06e+03
TotalSteps: 4.33e+06
VF_0_ExplainedVarNew: 0.952
VF_0_ExplainedVarOld: 0.

optflow_error |  0.0045 |  0.1109 |  0.0000 | 14.6259
pixel_icoords |    0.78   -0.06 |    6.87    6.70 |  -15.85  -16.68 |   16.34   14.54
theta_cv |    0.05 |    0.04 |    0.00 |    0.19
steps    |  192.95 |    6.98 |  180.00 |  215.00
***** Episode 22630, Mean R = 191.9  Std R = 8.5  Min R = 178.1
PolicyLoss: -0.00766
Policy_Beta: 0.1
Policy_Entropy: 0.169
Policy_KL: 0.00159
Policy_SD: 0.851
Policy_lr_mult: 1
Steps: 5.93e+03
TotalSteps: 4.38e+06
VF_0_ExplainedVarNew: 0.936
VF_0_ExplainedVarOld: 0.933
VF_0_Loss : 0.13


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68890964e-12] 3.063174122265867e-12
ADV1:  0.005855346107260035 0.03589292292593787 0.3151991575461287 -0.28598388847135514
ADV2:  -0.04399674649768279 0.7642515688372872 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5681   1.1056   5.6321  52.6418  30.7751  16.3562
ValFun  Gradie

a_f      |   -0.00   -0.11 |    0.26    1.75 |   -0.59   -3.10 |    0.59    3.10
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    4.39 |    4.96 |    0.00 |   10.00
tracking_rewards |  189.99 |    6.95 |  176.71 |  212.05
tracking_error |  0.0058 |  0.0060 |  0.0000 |  0.8357
optflow_error |  0.0050 |  0.2257 |  0.0000 | 39.7992
pixel_icoords |   -0.05   -0.44 |    6.57    6.74 |  -15.98  -15.92 |   15.32   16.05
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  192.75 |    6.92 |  180.00 |  215.00
***** Episode 22940, Mean R = 195.4  Std R = 10.3  Min R = 176.7
PolicyLoss: -0.0066
Policy_Beta: 0.1
Policy_Entropy: 0.163
Policy_KL: 0.000956
Policy_SD: 0.856
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 4.44e+06
VF_0_ExplainedVarNew: 0.942
VF_0_Exp

tracking_error |  0.0052 |  0.0038 |  0.0000 |  0.2469
optflow_error |  0.0038 |  0.0835 |  0.0000 | 11.7246
pixel_icoords |    0.04   -0.11 |    6.59    6.43 |  -15.32  -15.30 |   16.11   16.60
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.27 |    6.91 |  180.00 |  221.00
***** Episode 23250, Mean R = 198.7  Std R = 6.3  Min R = 180.0
PolicyLoss: -0.0122
Policy_Beta: 0.1
Policy_Entropy: 0.163
Policy_KL: 0.000604
Policy_SD: 0.85
Policy_lr_mult: 1
Steps: 6.04e+03
TotalSteps: 4.5e+06
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.141


ADV1:  -0.007834104838252887 0.03475864475983362 0.26799867734122884 -0.2721849055630449
ADV2:  0.03685371010654187 0.7914101295424417 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.9418   2.6647  13.0888  52.6418  30.7751  16.3562
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0237   0.0149   0.0765   1.2858   0.7421   0.3504
***** Episode 23281, Mean R = 193.5  Std R = 9.4  Min R = 176.6
Poli

tracking_error |  0.0056 |  0.0045 |  0.0000 |  0.4986
optflow_error |  0.0042 |  0.1244 |  0.0000 | 23.3469
pixel_icoords |    0.10    0.33 |    6.45    6.53 |  -15.67  -15.61 |   16.50   16.98
theta_cv |    0.05 |    0.04 |    0.00 |    0.17
steps    |  192.21 |    6.63 |  180.00 |  216.00
***** Episode 23560, Mean R = 194.8  Std R = 7.9  Min R = 176.8
PolicyLoss: -0.0092
Policy_Beta: 0.1
Policy_Entropy: 0.171
Policy_KL: 0.00179
Policy_SD: 0.854
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 4.56e+06
VF_0_ExplainedVarNew: 0.96
VF_0_ExplainedVarOld: 0.958
VF_0_Loss : 0.127


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68890964e-12] 3.063174122265867e-12
ADV1:  0.007383108372438313 0.03496921671436096 0.31184837962320366 -0.2340237526755477
ADV2:  -0.047651272099274375 0.793367098315796 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   7.0624   3.8

optflow_error |  0.0050 |  0.2444 |  0.0000 | 34.1692
pixel_icoords |   -0.20    0.04 |    6.80    7.08 |  -17.43  -17.56 |   17.62   14.64
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  192.94 |    7.33 |  180.00 |  223.00
***** Episode 23870, Mean R = 195.6  Std R = 8.7  Min R = 177.2
PolicyLoss: -0.0129
Policy_Beta: 0.1
Policy_Entropy: 0.168
Policy_KL: 0.000256
Policy_SD: 0.858
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 4.62e+06
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.176


ADV1:  -0.0022969700962949344 0.03147146050267681 0.25247115920353114 -0.2695034774456781
ADV2:  0.01180505522684881 0.7237496528482223 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   9.3265   7.8206  25.7331  63.4107  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0074   0.0034   0.0146   1.2858   0.7421   0.3504
***** Episode 23901, Mean R = 196.8  Std R = 7.7  Min R = 180.7
PolicyLoss: 0.00357
Policy_Beta: 0.1
Policy_Entropy: 0.1

a_f      |    0.02    0.11 |    0.29    1.76 |   -0.63   -3.13 |    0.60    3.11
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    4.94 |    5.00 |    0.00 |   10.00
tracking_rewards |  191.31 |    7.26 |  176.43 |  211.96
tracking_error |  0.0050 |  0.0037 |  0.0000 |  0.1171
optflow_error |  0.0031 |  0.0450 |  0.0000 |  5.5908
pixel_icoords |    0.36   -0.31 |    7.18    7.01 |  -14.99  -16.56 |   16.41   15.11
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.92 |    7.22 |  179.00 |  215.00
***** Episode 24180, Mean R = 197.1  Std R = 8.4  Min R = 176.4
PolicyLoss: 0.00684
Policy_Beta: 0.1
Policy_Entropy: 0.168
Policy_KL: 0.000864
Policy_SD: 0.85
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 4.68e+06
VF_0_ExplainedVarNew: 0.957
VF_0_Expla

tracking_error |  0.0049 |  0.0058 |  0.0000 |  0.8904
optflow_error |  0.0046 |  0.2245 |  0.0000 | 42.8489
pixel_icoords |   -0.23    0.03 |    6.88    6.80 |  -16.11  -15.93 |   16.24   17.00
theta_cv |    0.05 |    0.03 |    0.00 |    0.20
steps    |  193.75 |    7.63 |  179.00 |  218.00
***** Episode 24490, Mean R = 199.6  Std R = 10.2  Min R = 177.9
PolicyLoss: 0.0127
Policy_Beta: 0.1
Policy_Entropy: 0.169
Policy_KL: 0.00235
Policy_SD: 0.861
Policy_lr_mult: 1
Steps: 6.09e+03
TotalSteps: 4.74e+06
VF_0_ExplainedVarNew: 0.951
VF_0_ExplainedVarOld: 0.954
VF_0_Loss : 0.245


ADV1:  0.0003513805003195328 0.05429572328270052 0.4528936368207426 -0.353545065089811
ADV2:  -0.039638895546497986 0.644354070033987 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.9667   2.5171  12.9922  63.4107  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0158   0.0082   0.0396   1.2858   0.7421   0.3504
***** Episode 24521, Mean R = 194.0  Std R = 9.3  Min R = 179.2
Poli

optflow_error |  0.0035 |  0.0878 |  0.0000 | 14.1992
pixel_icoords |    0.17   -0.19 |    7.10    6.74 |  -17.91  -17.15 |   16.03   15.53
theta_cv |    0.05 |    0.04 |    0.00 |    0.22
steps    |  193.16 |    7.29 |  180.00 |  213.00
***** Episode 24800, Mean R = 198.1  Std R = 10.5  Min R = 179.1
PolicyLoss: -0.0198
Policy_Beta: 0.1
Policy_Entropy: 0.174
Policy_KL: 0.0012
Policy_SD: 0.865
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 4.8e+06
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.957
VF_0_Loss : 0.215


ADV1:  -0.0003541468196006389 0.030215514147791313 0.23341352104535057 -0.28204333527940384
ADV2:  0.004342688007446879 0.7827627089316284 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.5956   1.7551   7.7954  63.4107  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0060   0.0024   0.0108   1.2858   0.7421   0.3504
***** Episode 24831, Mean R = 196.8  Std R = 7.7  Min R = 179.3
PolicyLoss: -0.0044
Policy_Beta: 0.1
Policy_Entropy: 0.

a_f      |    0.01    0.04 |    0.25    1.70 |   -0.55   -3.12 |    0.56    3.13
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    4.71 |    4.99 |    0.00 |   10.00
tracking_rewards |  190.62 |    7.36 |  177.19 |  219.43
tracking_error |  0.0049 |  0.0054 |  0.0000 |  0.8270
optflow_error |  0.0040 |  0.2080 |  0.0000 | 39.6767
pixel_icoords |    0.29   -0.05 |    6.32    6.82 |  -15.23  -14.93 |   15.17   17.23
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.15 |    7.40 |  180.00 |  222.00
***** Episode 25110, Mean R = 197.1  Std R = 9.9  Min R = 179.8
PolicyLoss: -0.00236
Policy_Beta: 0.1
Policy_Entropy: 0.168
Policy_KL: 0.000892
Policy_SD: 0.848
Policy_lr_mult: 1
Steps: 6.04e+03
TotalSteps: 4.86e+06
VF_0_ExplainedVarNew: 0.952
VF_0_Exp

a_f      |    0.01    0.22 |    0.27    1.77 |   -0.60   -3.13 |    0.64    3.14
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    5.00 |    5.00 |    0.00 |   10.00
tracking_rewards |  190.68 |    6.73 |  176.78 |  214.34
tracking_error |  0.0046 |  0.0039 |  0.0000 |  0.4497
optflow_error |  0.0036 |  0.1060 |  0.0000 | 21.6716
pixel_icoords |    0.34   -0.82 |    6.66    6.53 |  -16.01  -14.26 |   15.95   14.73
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.19 |    6.74 |  180.00 |  216.00
***** Episode 25420, Mean R = 195.6  Std R = 8.5  Min R = 179.9
PolicyLoss: -0.00964
Policy_Beta: 0.1
Policy_Entropy: 0.17
Policy_KL: 0.000991
Policy_SD: 0.86
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 4.92e+06
VF_0_ExplainedVarNew: 0.959
VF_0_Expla

tracking_error |  0.0044 |  0.0041 |  0.0000 |  0.6188
optflow_error |  0.0035 |  0.1318 |  0.0000 | 29.2646
pixel_icoords |    0.46    0.04 |    6.71    6.78 |  -16.79  -14.79 |   16.09   16.27
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  193.35 |    7.05 |  179.00 |  220.00
***** Episode 25730, Mean R = 197.9  Std R = 7.0  Min R = 185.0
PolicyLoss: -0.00398
Policy_Beta: 0.1
Policy_Entropy: 0.171
Policy_KL: 0.000564
Policy_SD: 0.855
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 4.98e+06
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.181


ADV1:  -0.002991620594649227 0.03043240608092592 0.2593942063945418 -0.34566582028400633
ADV2:  0.023266452243710458 0.7412836445966182 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.3989   1.9497   8.5373  63.4107  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0051   0.0030   0.0130   1.2858   0.7421   0.3504
***** Episode 25761, Mean R = 195.9  Std R = 9.1  Min R = 180.2


tracking_error |  0.0046 |  0.0034 |  0.0000 |  0.2200
optflow_error |  0.0029 |  0.0596 |  0.0000 | 10.5053
pixel_icoords |   -0.22   -0.06 |    6.74    6.80 |  -16.30  -17.32 |   15.57   14.54
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.18 |    7.09 |  180.00 |  216.00
***** Episode 26040, Mean R = 198.9  Std R = 9.6  Min R = 178.5
PolicyLoss: -0.00665
Policy_Beta: 0.1
Policy_Entropy: 0.162
Policy_KL: 0.00096
Policy_SD: 0.852
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 5.04e+06
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.213


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68890964e-12] 3.063174122265867e-12
ADV1:  -0.00202341313248844 0.026513572214563165 0.2560405436409221 -0.24945680316462238
ADV2:  0.012352742971457548 0.7806166593033896 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.7575   

w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    5.71 |    4.95 |    0.00 |   10.00
tracking_rewards |  190.35 |    6.94 |  177.17 |  211.45
tracking_error |  0.0041 |  0.0052 |  0.0000 |  1.0117
optflow_error |  0.0035 |  0.2030 |  0.0000 | 48.1141
pixel_icoords |    0.23   -0.82 |    6.66    6.37 |  -16.10  -16.67 |   14.41   15.26
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  192.73 |    6.94 |  180.00 |  214.00
***** Episode 26350, Mean R = 196.7  Std R = 8.3  Min R = 185.1
PolicyLoss: 0.028
Policy_Beta: 0.1
Policy_Entropy: 0.162
Policy_KL: 0.000642
Policy_SD: 0.852
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 5.1e+06
VF_0_ExplainedVarNew: 0.962
VF_0_ExplainedVarOld: 0.957
VF_0_Loss : 0.225


ADV1:  0.006414437312617767 0.031058952988586

tracking_rewards |  190.49 |    6.61 |  177.07 |  210.34
tracking_error |  0.0038 |  0.0028 |  0.0000 |  0.0933
optflow_error |  0.0025 |  0.0336 |  0.0000 |  4.4354
pixel_icoords |    0.52   -0.09 |    6.76    6.44 |  -15.58  -16.02 |   16.72   18.85
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  192.82 |    6.56 |  180.00 |  212.00
***** Episode 26660, Mean R = 196.4  Std R = 9.1  Min R = 178.3
PolicyLoss: -0.00292
Policy_Beta: 0.1
Policy_Entropy: 0.162
Policy_KL: 0.00051
Policy_SD: 0.847
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 5.16e+06
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.148


ADV1:  0.0035309926571645304 0.030480677080526087 0.27044277809321726 -0.2768151471575776
ADV2:  -0.012840995245817 0.8007014850919393 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.7266   1.7592   8.9985  63.4107  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0072   0.0045   0.0187   1.2858   0.7421   0.3504
***** Epi

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |   -0.00    0.01 |    0.27    1.73 |   -0.69   -3.14 |    0.59    3.13
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    6.23 |    4.85 |    0.00 |   10.00
tracking_rewards |  191.16 |    6.76 |  176.69 |  215.17
tracking_error |  0.0041 |  0.0053 |  0.0000 |  0.9179
optflow_error |  0.0038 |  0.2143 |  0.0000 | 43.9925
pixel_icoords |   -0.14   -0.12 |    6.89    7.16 |  -17.57  -16.26 |   16.68   16.18
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.47 |    6.85 |  180.00 |  218.00
***** Episode 26970, Mean R = 197.8  Std R = 7.5  Min R = 179.1
PolicyLoss: -0.00147
Policy_Beta: 0.1
Policy_Entropy: 0.163
Policy_KL: 0.00

tracking_error |  0.0041 |  0.0033 |  0.0000 |  0.2432
optflow_error |  0.0031 |  0.0839 |  0.0000 | 11.9173
pixel_icoords |   -0.05   -0.03 |    7.05    6.91 |  -16.85  -17.35 |   15.75   15.98
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  194.19 |    7.46 |  180.00 |  218.00
***** Episode 27280, Mean R = 197.7  Std R = 8.9  Min R = 179.3
PolicyLoss: -0.0183
Policy_Beta: 0.1
Policy_Entropy: 0.166
Policy_KL: 0.00106
Policy_SD: 0.852
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 5.28e+06
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.175


ADV1:  -0.006746442709005279 0.028937443572121153 0.2110805988311768 -0.31109110478199975
ADV2:  0.044605226861748025 0.7921738146637314 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  10.2342   8.7679  31.2133  63.4107  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0094   0.0048   0.0213   1.2858   0.7421   0.3504
***** Episode 27311, Mean R = 195.7  Std R = 7.3  Min R = 183.7
Po

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.01    0.00 |    0.29    1.84 |   -0.63   -3.11 |    0.67    3.11
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    5.94 |    4.91 |    0.00 |   10.00
tracking_rewards |  191.31 |    7.18 |  176.86 |  216.66
tracking_error |  0.0041 |  0.0039 |  0.0000 |  0.6490
optflow_error |  0.0030 |  0.1300 |  0.0000 | 30.7541
pixel_icoords |    0.19   -0.25 |    7.26    6.42 |  -16.77  -13.92 |   16.02   14.38
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  193.61 |    7.15 |  179.00 |  219.00
***** Episode 27590, Mean R = 197.0  Std R = 9.3  Min R = 178.4
PolicyLoss: -0.00292
Policy_Beta: 0.1
Policy_Entropy: 0.168
Policy_KL: 0.00

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.02    0.05 |    0.27    1.77 |   -0.57   -3.12 |    0.62    3.14
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    5.81 |    4.93 |    0.00 |   10.00
tracking_rewards |  190.86 |    6.92 |  177.68 |  219.11
tracking_error |  0.0041 |  0.0038 |  0.0000 |  0.5615
optflow_error |  0.0033 |  0.1284 |  0.0000 | 27.3681
pixel_icoords |    0.47   -0.83 |    6.71    6.62 |  -15.10  -15.33 |   16.03   16.50
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.14 |    6.94 |  179.00 |  221.00
***** Episode 27900, Mean R = 197.5  Std R = 6.5  Min R = 179.4
PolicyLoss: -0.0161
Policy_Beta: 0.1
Policy_Entropy: 0.171
Policy_KL: 0.000

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |   -0.04   -0.07 |    0.27    1.75 |   -0.64   -3.12 |    0.62    3.12
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    5.45 |    4.98 |    0.00 |   10.00
tracking_rewards |  191.34 |    7.36 |  177.58 |  220.95
tracking_error |  0.0038 |  0.0026 |  0.0000 |  0.0953
optflow_error |  0.0024 |  0.0363 |  0.0000 |  4.6141
pixel_icoords |   -0.92    0.29 |    6.79    6.92 |  -18.00  -14.88 |   17.34   14.86
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  193.62 |    7.39 |  179.00 |  224.00
***** Episode 28210, Mean R = 197.6  Std R = 7.2  Min R = 180.5
PolicyLoss: 0.00501
Policy_Beta: 0.1
Policy_Entropy: 0.172
Policy_KL: 0.000

optflow_error |  0.0033 |  0.1244 |  0.0000 | 24.4459
pixel_icoords |    0.00   -0.20 |    6.38    6.35 |  -15.13  -15.64 |   13.33   17.46
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  192.47 |    6.89 |  179.00 |  216.00
***** Episode 28520, Mean R = 198.5  Std R = 9.7  Min R = 177.8
PolicyLoss: 0.0037
Policy_Beta: 0.1
Policy_Entropy: 0.177
Policy_KL: 0.000665
Policy_SD: 0.855
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 5.52e+06
VF_0_ExplainedVarNew: 0.96
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.194


ADV1:  0.0004475068026477526 0.02798141588913055 0.2158495722974888 -0.24469564763086296
ADV2:  -0.0026933715870564178 0.7818387632095094 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   7.4974   3.4437  17.6021  63.4107  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0007   0.0032   1.2858   0.7421   0.3504
***** Episode 28551, Mean R = 197.8  Std R = 9.4  Min R = 179.1
PolicyLoss: -0.00148
Policy_Beta: 0.1
Policy_Entropy: 0.1

fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    6.23 |    4.85 |    0.00 |   10.00
tracking_rewards |  191.87 |    7.28 |  177.60 |  219.76
tracking_error |  0.0039 |  0.0029 |  0.0000 |  0.1498
optflow_error |  0.0025 |  0.0445 |  0.0000 |  7.0585
pixel_icoords |    0.78    0.18 |    6.84    6.87 |  -16.43  -16.98 |   16.73   16.15
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  194.05 |    7.35 |  180.00 |  223.00
***** Episode 28830, Mean R = 198.7  Std R = 7.4  Min R = 181.1
PolicyLoss: 8.3e-05
Policy_Beta: 0.1
Policy_Entropy: 0.168
Policy_KL: 0.000775
Policy_SD: 0.848
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 5.58e+06
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.19


ADV1:  -0.0027285238112933693 0.029989743024928302 0.20129254888380121 -0.2735957951168886
ADV2:  -0.0002238836524062487 0.8051428316630544 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.4541   2.1165  11.0898  63.4107  34.7737  21.3128
ValFu

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |   -0.01   -0.02 |    0.27    1.78 |   -0.66   -3.08 |    0.62    3.14
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    6.29 |    4.83 |    0.00 |   10.00
tracking_rewards |  190.94 |    7.23 |  177.24 |  212.76
tracking_error |  0.0040 |  0.0040 |  0.0000 |  0.3707
optflow_error |  0.0038 |  0.1390 |  0.0000 | 17.4271
pixel_icoords |   -0.32   -0.62 |    6.72    6.48 |  -16.44  -16.93 |   16.30   13.68
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.17 |    7.20 |  179.00 |  215.00
***** Episode 29140, Mean R = 196.5  Std R = 8.6  Min R = 179.5
PolicyLoss: -0.0117
Policy_Beta: 0.1
Policy_Entropy: 0.176
Policy_KL: 0.000

tracking_error |  0.0039 |  0.0035 |  0.0000 |  0.3981
optflow_error |  0.0031 |  0.1052 |  0.0000 | 19.1259
pixel_icoords |    0.14   -0.25 |    6.68    6.24 |  -14.76  -15.45 |   16.42   17.49
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.38 |    7.01 |  179.00 |  219.00
***** Episode 29450, Mean R = 198.9  Std R = 8.7  Min R = 176.9
PolicyLoss: 0.0143
Policy_Beta: 0.1
Policy_Entropy: 0.187
Policy_KL: 0.000964
Policy_SD: 0.845
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 5.7e+06
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.122


ADV1:  0.0003828948920415186 0.03187191042730174 0.2736626115318206 -0.299318514290605
ADV2:  -0.007201941183204699 0.7281009765365023 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.8623   2.5518  13.2094  63.4107  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0094   0.0048   0.0193   1.2858   0.7421   0.3504
***** Episode 29481, Mean R = 199.9  Std R = 7.7  Min R = 180.4
Polic

tracking_error |  0.0043 |  0.0038 |  0.0000 |  0.4482
optflow_error |  0.0030 |  0.1031 |  0.0000 | 21.7520
pixel_icoords |    0.00   -0.13 |    6.82    6.80 |  -15.82  -17.61 |   18.10   16.13
theta_cv |    0.05 |    0.04 |    0.00 |    0.22
steps    |  193.37 |    7.04 |  180.00 |  213.00
***** Episode 29760, Mean R = 196.6  Std R = 8.9  Min R = 181.3
PolicyLoss: -0.00514
Policy_Beta: 0.1
Policy_Entropy: 0.194
Policy_KL: 0.00141
Policy_SD: 0.849
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 5.76e+06
VF_0_ExplainedVarNew: 0.958
VF_0_ExplainedVarOld: 0.958
VF_0_Loss : 0.153


ADV1:  0.003168121251990548 0.026819148910901765 0.3011340396251493 -0.2200235032749391
ADV2:  -0.040188247269147714 0.7111606894285644 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.4381   1.5133   8.2228  63.4107  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0061   0.0032   0.0122   1.2858   0.7421   0.3504
***** Episode 29791, Mean R = 199.9  Std R = 8.5  Min R = 184.3
P

theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  194.00 |    6.88 |  180.00 |  217.00
***** Episode 30070, Mean R = 200.7  Std R = 9.1  Min R = 183.3
PolicyLoss: 0.0138
Policy_Beta: 0.1
Policy_Entropy: 0.19
Policy_KL: 0.00126
Policy_SD: 0.851
Policy_lr_mult: 1
Steps: 6.06e+03
TotalSteps: 5.82e+06
VF_0_ExplainedVarNew: 0.953
VF_0_ExplainedVarOld: 0.951
VF_0_Loss : 0.192


ADV1:  0.0026056023823680025 0.02791223477619322 0.2022733679738533 -0.3589441920826264
ADV2:  -0.0045229478391750995 0.6940876907534878 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.5999   2.4487  10.7339  63.4107  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0095   0.0036   0.0190   1.2858   0.7421   0.3504
***** Episode 30101, Mean R = 199.9  Std R = 8.7  Min R = 179.8
PolicyLoss: -0.0278
Policy_Beta: 0.1
Policy_Entropy: 0.191
Policy_KL: 0.00143
Policy_SD: 0.859
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 5.82e+06
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0

tracking_error |  0.0037 |  0.0033 |  0.0000 |  0.3514
optflow_error |  0.0029 |  0.0989 |  0.0000 | 16.4930
pixel_icoords |    0.21   -0.84 |    7.23    6.93 |  -16.41  -15.29 |   16.34   13.51
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.59 |    6.93 |  180.00 |  216.00
***** Episode 30380, Mean R = 198.4  Std R = 9.5  Min R = 181.3
PolicyLoss: -0.0131
Policy_Beta: 0.1
Policy_Entropy: 0.18
Policy_KL: 0.00108
Policy_SD: 0.857
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 5.88e+06
VF_0_ExplainedVarNew: 0.96
VF_0_ExplainedVarOld: 0.961
VF_0_Loss : 0.252


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68890964e-12] 3.063174122265867e-12
ADV1:  -0.0005574687613978228 0.028941166186064824 0.22396906164101094 -0.3155567112726845
ADV2:  0.01643276615541402 0.7519232855827345 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.0226   2.

tracking_error |  0.0039 |  0.0038 |  0.0000 |  0.5871
optflow_error |  0.0030 |  0.1361 |  0.0000 | 28.3354
pixel_icoords |   -0.07   -0.43 |    6.90    7.12 |  -17.04  -16.29 |   15.50   15.26
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  193.46 |    6.96 |  179.00 |  220.00
***** Episode 30690, Mean R = 201.0  Std R = 8.0  Min R = 190.4
PolicyLoss: 0.0147
Policy_Beta: 0.1
Policy_Entropy: 0.192
Policy_KL: 0.00109
Policy_SD: 0.848
Policy_lr_mult: 1
Steps: 6.06e+03
TotalSteps: 5.94e+06
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.166


ADV1:  0.002012425196496705 0.025111954732059242 0.16769209346580405 -0.2594325355760678
ADV2:  -0.0032412914694403896 0.7434113411451225 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.1659   1.6209   8.2672  66.6180  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0048   0.0015   0.0075   1.2858   0.7421   0.3504
***** Episode 30721, Mean R = 201.5  Std R = 7.7  Min R = 188.4
P

tracking_error |  0.0041 |  0.0042 |  0.0000 |  0.7433
optflow_error |  0.0029 |  0.1483 |  0.0000 | 35.6422
pixel_icoords |   -0.46    0.45 |    7.03    6.98 |  -17.16  -16.01 |   15.24   16.42
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  194.02 |    7.34 |  179.00 |  216.00
***** Episode 31000, Mean R = 197.6  Std R = 8.7  Min R = 183.5
PolicyLoss: -0.0231
Policy_Beta: 0.1
Policy_Entropy: 0.194
Policy_KL: 0.000918
Policy_SD: 0.846
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 6e+06
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.168


ADV1:  0.0009975352584453974 0.02831606757741862 0.21699500690361062 -0.218592697000497
ADV2:  -0.018919166281697136 0.7745563136007213 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.8613   1.2356   6.1875  66.6180  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0070   0.0029   0.0115   1.2858   0.7421   0.3504
***** Episode 31031, Mean R = 199.4  Std R = 7.8  Min R = 179.5
Poli

tracking_error |  0.0039 |  0.0033 |  0.0000 |  0.4493
optflow_error |  0.0028 |  0.1051 |  0.0000 | 21.8213
pixel_icoords |    0.14   -0.26 |    6.73    6.29 |  -15.96  -16.26 |   16.65   13.99
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  192.81 |    6.96 |  180.00 |  217.00
***** Episode 31310, Mean R = 197.7  Std R = 7.8  Min R = 180.4
PolicyLoss: -0.00801
Policy_Beta: 0.1
Policy_Entropy: 0.192
Policy_KL: 0.000727
Policy_SD: 0.843
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 6.06e+06
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.154


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68890964e-12] 3.063174122265867e-12
ADV1:  -0.005845141423300263 0.024136157776805545 0.22865805953552132 -0.20503608771443538
ADV2:  0.041678775750420304 0.7723047202517418 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  20.5786

hit_rewards |    7.39 |    4.39 |    0.00 |   10.00
tracking_rewards |  190.91 |    6.99 |  177.71 |  212.60
tracking_error |  0.0040 |  0.0044 |  0.0000 |  0.7610
optflow_error |  0.0031 |  0.1557 |  0.0000 | 36.2225
pixel_icoords |   -0.22    0.00 |    6.55    6.35 |  -15.19  -15.86 |   17.38   15.29
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  193.04 |    7.05 |  180.00 |  215.00
***** Episode 31620, Mean R = 197.7  Std R = 7.0  Min R = 181.9
PolicyLoss: -0.0198
Policy_Beta: 0.1
Policy_Entropy: 0.188
Policy_KL: 0.000573
Policy_SD: 0.833
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 6.12e+06
VF_0_ExplainedVarNew: 0.953
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.0718


ADV1:  -0.001617999814231422 0.030319538333001734 0.2298135340213776 -0.2869644236856725
ADV2:  -0.0008749891613162121 0.747336096785342 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.1273   3.2264  15.9042  66.6180  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0189

hit_rewards |    7.68 |    4.22 |    0.00 |   10.00
tracking_rewards |  191.41 |    6.81 |  176.11 |  213.56
tracking_error |  0.0040 |  0.0040 |  0.0000 |  0.6315
optflow_error |  0.0027 |  0.1306 |  0.0000 | 30.2922
pixel_icoords |   -0.25    0.36 |    6.71    6.70 |  -18.90  -16.54 |   15.20   15.55
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.48 |    6.81 |  179.00 |  215.00
***** Episode 31930, Mean R = 197.6  Std R = 9.9  Min R = 176.1
PolicyLoss: -0.00267
Policy_Beta: 0.1
Policy_Entropy: 0.197
Policy_KL: 0.000867
Policy_SD: 0.854
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 6.18e+06
VF_0_ExplainedVarNew: 0.954
VF_0_ExplainedVarOld: 0.954
VF_0_Loss : 0.151


ADV1:  -0.0004164058633398704 0.026155089476342576 0.15781881947991006 -0.22844235301017757
ADV2:  0.023469278824718186 0.7627448407994597 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.5172   3.1434  16.0827  66.6180  34.7737  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.00

tracking_error |  0.0036 |  0.0028 |  0.0000 |  0.0947
optflow_error |  0.0022 |  0.0283 |  0.0000 |  4.3282
pixel_icoords |    0.12   -0.70 |    6.80    6.86 |  -16.63  -17.17 |   16.12   14.20
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.22 |    6.85 |  180.00 |  219.00
***** Episode 32240, Mean R = 197.6  Std R = 7.3  Min R = 183.3
PolicyLoss: -0.0215
Policy_Beta: 0.1
Policy_Entropy: 0.201
Policy_KL: 0.0011
Policy_SD: 0.844
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 6.24e+06
VF_0_ExplainedVarNew: 0.957
VF_0_ExplainedVarOld: 0.959
VF_0_Loss : 0.123


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68890964e-12] 3.063174122265867e-12
ADV1:  0.004655277863588723 0.0302600273265254 0.2275356720943087 -0.2272247494472172
ADV2:  -0.011605777281229166 0.7631387621293894 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   7.4006   5.0832 

tracking_error |  0.0039 |  0.0028 |  0.0000 |  0.0635
optflow_error |  0.0020 |  0.0206 |  0.0000 |  2.8211
pixel_icoords |    0.76    0.13 |    6.59    6.80 |  -15.26  -15.71 |   16.07   16.08
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.02 |    7.06 |  180.00 |  219.00
***** Episode 32550, Mean R = 196.6  Std R = 9.9  Min R = 177.2
PolicyLoss: -0.00732
Policy_Beta: 0.1
Policy_Entropy: 0.198
Policy_KL: 0.000612
Policy_SD: 0.843
Policy_lr_mult: 1
Steps: 5.91e+03
TotalSteps: 6.3e+06
VF_0_ExplainedVarNew: 0.956
VF_0_ExplainedVarOld: 0.958
VF_0_Loss : 0.136


ADV1:  -0.0012067791525120922 0.02292109713426513 0.159948143362999 -0.19284978194662367
ADV2:  0.015640457420477004 0.7796926933154031 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  35.4753  20.8772  65.7177  66.6180  35.4753  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0061   0.0028   0.0134   1.2858   0.7421   0.3504
***** Episode 32581, Mean R = 198.0  Std R = 8.7  Min R = 178.7
P

tracking_error |  0.0041 |  0.0032 |  0.0000 |  0.2285
optflow_error |  0.0023 |  0.0624 |  0.0000 | 11.1782
pixel_icoords |   -0.14    0.79 |    6.86    6.89 |  -16.51  -15.31 |   15.16   17.62
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  192.93 |    7.24 |  180.00 |  222.00
***** Episode 32860, Mean R = 196.3  Std R = 6.8  Min R = 180.5
PolicyLoss: 0.0199
Policy_Beta: 0.1
Policy_Entropy: 0.202
Policy_KL: 0.000897
Policy_SD: 0.839
Policy_lr_mult: 1
Steps: 5.91e+03
TotalSteps: 6.36e+06
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.136


ADV1:  0.003395352989294066 0.023973892295592474 0.19477107975479024 -0.277158036317097
ADV2:  -0.02570905367215286 0.750008175952439 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   8.2112   5.2888  27.8416  66.6180  35.4753  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0066   0.0036   0.0153   1.2858   0.7421   0.3504
***** Episode 32891, Mean R = 201.5  Std R = 8.6  Min R = 184.6
Poli

optflow_error |  0.0024 |  0.0455 |  0.0000 |  7.2401
pixel_icoords |   -0.37    0.22 |    6.41    6.84 |  -16.56  -15.23 |   14.75   15.89
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  193.24 |    7.44 |  180.00 |  217.00
***** Episode 33170, Mean R = 199.0  Std R = 7.6  Min R = 180.5
PolicyLoss: -0.0132
Policy_Beta: 0.1
Policy_Entropy: 0.206
Policy_KL: 0.000777
Policy_SD: 0.846
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 6.42e+06
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.149


ADV1:  -0.003466898050527104 0.026091003152323537 0.25486912141372386 -0.25353350449324086
ADV2:  0.03223190417675888 0.7404225919219367 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.1025   2.4827   8.9621  66.6180  35.4753  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0042   0.0018   0.0087   1.2858   0.7421   0.3504
***** Episode 33201, Mean R = 198.6  Std R = 7.9  Min R = 181.5
PolicyLoss: -0.00643
Policy_Beta: 0.1
Policy_Entropy: 0

tracking_error |  0.0036 |  0.0027 |  0.0000 |  0.1823
optflow_error |  0.0023 |  0.0534 |  0.0000 |  8.6715
pixel_icoords |   -0.44   -0.89 |    6.56    7.00 |  -15.45  -19.10 |   17.29   17.39
theta_cv |    0.05 |    0.03 |    0.00 |    0.16
steps    |  193.25 |    7.12 |  180.00 |  216.00
***** Episode 33480, Mean R = 198.2  Std R = 8.2  Min R = 182.8
PolicyLoss: -0.0174
Policy_Beta: 0.1
Policy_Entropy: 0.198
Policy_KL: 0.00137
Policy_SD: 0.835
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 6.48e+06
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.127


ADV1:  -0.0029144808981986393 0.027381993462424998 0.17047448754310612 -0.4032466556383797
ADV2:  0.02747021860718634 0.7945492347270642 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  14.1450  15.0106  47.5593  66.6180  35.4753  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0017   0.0075   1.2858   0.7421   0.3504
***** Episode 33511, Mean R = 197.5  Std R = 8.0  Min R = 180.5
P

tracking_error |  0.0037 |  0.0032 |  0.0000 |  0.2710
optflow_error |  0.0026 |  0.0823 |  0.0000 | 12.9274
pixel_icoords |    0.11    0.05 |    6.65    7.13 |  -14.68  -16.42 |   15.06   18.97
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  192.91 |    7.42 |  180.00 |  217.00
***** Episode 33790, Mean R = 198.1  Std R = 7.6  Min R = 184.0
PolicyLoss: 0.017
Policy_Beta: 0.1
Policy_Entropy: 0.198
Policy_KL: 0.00056
Policy_SD: 0.841
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 6.54e+06
VF_0_ExplainedVarNew: 0.954
VF_0_ExplainedVarOld: 0.959
VF_0_Loss : 0.148


ADV1:  0.002073108043180407 0.030248751350431726 0.23384167622804586 -0.31017044543768646
ADV2:  -0.009570314942675476 0.7168832800652719 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.0648   3.9313  16.7156  66.6180  35.4753  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0065   0.0032   0.0114   1.2858   0.7421   0.3504
***** Episode 33821, Mean R = 201.4  Std R = 7.5  Min R = 190.5
Po

optflow_error |  0.0035 |  0.2181 |  0.0000 | 43.7929
pixel_icoords |   -0.12   -0.13 |    6.56    6.45 |  -18.28  -13.51 |   15.70   16.60
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.84 |    6.80 |  180.00 |  219.00
***** Episode 34100, Mean R = 197.2  Std R = 8.9  Min R = 182.1
PolicyLoss: -0.00281
Policy_Beta: 0.1
Policy_Entropy: 0.209
Policy_KL: 0.000891
Policy_SD: 0.839
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 6.6e+06
VF_0_ExplainedVarNew: 0.955
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.196


ADV1:  0.005707121899185014 0.029796929332595916 0.2868327677249909 -0.2929718751399819
ADV2:  -0.01782283143897422 0.7791967207623236 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.2502   1.9561   9.4253  66.6180  35.4753  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0068   0.0032   0.0153   1.2858   0.7421   0.3504
***** Episode 34131, Mean R = 199.2  Std R = 7.1  Min R = 188.0
PolicyLoss: -0.0025
Policy_Beta: 0.1
Policy_Entropy: 0.20

theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  192.33 |    6.41 |  180.00 |  217.00
***** Episode 34410, Mean R = 198.7  Std R = 6.9  Min R = 182.0
PolicyLoss: -0.0181
Policy_Beta: 0.1
Policy_Entropy: 0.21
Policy_KL: 0.000818
Policy_SD: 0.821
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 6.66e+06
VF_0_ExplainedVarNew: 0.945
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.071


ADV1:  0.0018445841822045523 0.028503085621058886 0.2893031009503847 -0.21289460910311775
ADV2:  -0.020683247968834126 0.7550451215260089 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   8.0311   4.7671  21.4060  66.6180  35.4753  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0127   0.0071   0.0271   1.2858   0.7421   0.3504
***** Episode 34441, Mean R = 200.3  Std R = 9.8  Min R = 182.7
PolicyLoss: 0.00752
Policy_Beta: 0.1
Policy_Entropy: 0.208
Policy_KL: 0.00156
Policy_SD: 0.833
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 6.66e+06
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVar

optflow_error |  0.0022 |  0.0314 |  0.0000 |  3.7543
pixel_icoords |   -0.03    0.10 |    6.83    6.56 |  -15.31  -18.13 |   15.67   15.81
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  192.96 |    6.85 |  180.00 |  218.00
***** Episode 34720, Mean R = 199.8  Std R = 9.6  Min R = 177.6
PolicyLoss: 0.0164
Policy_Beta: 0.1
Policy_Entropy: 0.208
Policy_KL: 0.000927
Policy_SD: 0.842
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 6.72e+06
VF_0_ExplainedVarNew: 0.944
VF_0_ExplainedVarOld: 0.958
VF_0_Loss : 0.217


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  -0.0018970067598439153 0.026376510247728538 0.15308559677444855 -0.2754288481529818
ADV2:  0.03357418776180621 0.7314097294990466 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0900   0.5675   2.6095  66.6180  37.8963  21.3128
ValFun  Grad

theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  193.70 |    7.09 |  180.00 |  218.00
***** Episode 35030, Mean R = 200.6  Std R = 6.1  Min R = 189.2
PolicyLoss: -0.00922
Policy_Beta: 0.1
Policy_Entropy: 0.212
Policy_KL: 0.000964
Policy_SD: 0.819
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 6.78e+06
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.0896


ADV1:  0.0016085379191959476 0.027334728411059387 0.20626956723820056 -0.3330785147023648
ADV2:  -0.003214829876554027 0.7513445228937133 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  15.6855  11.7971  40.8180  66.6180  37.8963  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0058   0.0030   0.0099   1.2858   0.7421   0.3504
***** Episode 35061, Mean R = 200.4  Std R = 6.5  Min R = 184.7
PolicyLoss: -0.00636
Policy_Beta: 0.1
Policy_Entropy: 0.214
Policy_KL: 0.000844
Policy_SD: 0.833
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 6.78e+06
VF_0_ExplainedVarNew: 0.969
VF_0_Explain

tracking_error |  0.0034 |  0.0025 |  0.0000 |  0.1154
optflow_error |  0.0020 |  0.0316 |  0.0000 |  5.2851
pixel_icoords |    0.03   -0.18 |    6.73    6.72 |  -14.69  -14.83 |   16.17   15.08
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.15 |    6.84 |  180.00 |  214.00
***** Episode 35340, Mean R = 200.5  Std R = 8.9  Min R = 178.5
PolicyLoss: -0.0167
Policy_Beta: 0.1
Policy_Entropy: 0.209
Policy_KL: 0.000723
Policy_SD: 0.844
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 6.84e+06
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.17


ADV1:  -0.0027665236955893195 0.025560277513027845 0.2124239350758491 -0.2469824164829229
ADV2:  0.02106212630895261 0.7453500402922075 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  11.4933   9.0269  30.8211  66.6180  37.8963  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0061   0.0033   0.0125   1.2858   0.7421   0.3504
***** Episode 35371, Mean R = 199.4  Std R = 8.0  Min R = 184.5
Po

optflow_error |  0.0025 |  0.1034 |  0.0000 | 22.7441
pixel_icoords |   -0.23   -0.20 |    6.67    6.37 |  -17.01  -15.10 |   15.65   17.44
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  192.36 |    7.14 |  180.00 |  214.00
***** Episode 35650, Mean R = 197.9  Std R = 9.2  Min R = 180.8
PolicyLoss: 0.0138
Policy_Beta: 0.1
Policy_Entropy: 0.216
Policy_KL: 0.000938
Policy_SD: 0.841
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 6.9e+06
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.177


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  0.003153875515273288 0.028103609145709457 0.22479670063965707 -0.24905420055938954
ADV2:  -0.0002359021672653572 0.7858093669953745 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  10.5427   9.0348  36.1876  66.6180  37.8963  21.3128
ValFun  Gra

optflow_error |  0.0022 |  0.0656 |  0.0000 | 15.0056
pixel_icoords |   -0.05    0.15 |    7.21    6.92 |  -15.86  -16.83 |   16.28   15.19
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.88 |    6.79 |  179.00 |  213.00
***** Episode 35960, Mean R = 202.0  Std R = 6.6  Min R = 184.2
PolicyLoss: 0.0067
Policy_Beta: 0.1
Policy_Entropy: 0.218
Policy_KL: 0.00129
Policy_SD: 0.832
Policy_lr_mult: 1
Steps: 6.04e+03
TotalSteps: 6.96e+06
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.168


ADV1:  -0.0076337803099822054 0.025228422087923814 0.1756526830612536 -0.3160952042340045
ADV2:  0.09183648949397426 0.7542552041274471 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   9.9404   8.1028  33.5421  66.6180  37.8963  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0128   0.0085   0.0375   1.2858   0.7421   0.3504
***** Episode 35991, Mean R = 199.2  Std R = 6.5  Min R = 189.3
PolicyLoss: -0.0255
Policy_Beta: 0.1
Policy_Entropy: 0.221

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
a_f      |    0.00    0.03 |    0.27    1.76 |   -0.63   -3.14 |    0.66    3.11
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    8.26 |    3.79 |    0.00 |   10.00
tracking_rewards |  191.27 |    7.08 |  177.66 |  210.59
tracking_error |  0.0037 |  0.0030 |  0.0000 |  0.3837
optflow_error |  0.0024 |  0.0803 |  0.0000 | 18.2248
pixel_icoords |    0.06   -0.47 |    6.91    6.48 |  -15.48  -15.21 |   18.17   14.36
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.20 |    7.11 |  179.00 |  212.00
***** Episode 36270, Mean R = 198.4  Std R = 6.4  Min R = 179.6
PolicyLoss: -0.00389
Policy_Beta: 0.1
Policy_Entropy: 0.221
Policy_KL: 0.00

***** Episode 36580, Mean R = 196.5  Std R = 7.8  Min R = 179.1
PolicyLoss: -0.0193
Policy_Beta: 0.1
Policy_Entropy: 0.211
Policy_KL: 0.000903
Policy_SD: 0.832
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 7.08e+06
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.103


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  0.006072678365732262 0.02420053877178613 0.19508009485031264 -0.2632161038577816
ADV2:  -0.016386355378644798 0.7891840750917813 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.1645   4.9168  21.5357  66.6180  37.8963  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0060   0.0029   0.0146   1.2858   0.7421   0.3504
***** Episode 36611, Mean R = 198.1  Std R = 8.0  Min R = 178.7
PolicyLoss: -0.0136
Policy_Beta: 0.1
Policy_Entropy: 0.213
Policy_KL: 0.000965
Poli

optflow_error |  0.0029 |  0.1552 |  0.0000 | 32.3485
pixel_icoords |    0.28    0.27 |    6.96    6.25 |  -16.26  -14.61 |   15.63   13.85
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.75 |    7.08 |  180.00 |  219.00
***** Episode 36890, Mean R = 198.9  Std R = 9.9  Min R = 178.7
PolicyLoss: -0.0173
Policy_Beta: 0.1
Policy_Entropy: 0.218
Policy_KL: 0.000747
Policy_SD: 0.854
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 7.14e+06
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.213


ADV1:  0.0010189945556848884 0.028327471865504644 0.19646330764046127 -0.25705797405866826
ADV2:  0.009535141642088296 0.7578072406897858 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  15.9748   9.6468  35.5551  66.6180  37.8963  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0011   0.0050   1.2858   0.7421   0.3504
***** Episode 36921, Mean R = 198.0  Std R = 8.8  Min R = 180.0
PolicyLoss: -0.0187
Policy_Beta: 0.1
Policy_Entropy: 0

a_f      |    0.02    0.11 |    0.26    1.74 |   -0.56   -3.11 |    0.60    3.12
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    8.29 |    3.76 |    0.00 |   10.00
tracking_rewards |  191.29 |    7.21 |  177.87 |  213.64
tracking_error |  0.0040 |  0.0037 |  0.0000 |  0.6164
optflow_error |  0.0024 |  0.1230 |  0.0000 | 29.3932
pixel_icoords |    0.58    0.45 |    6.63    7.01 |  -14.38  -17.42 |   15.39   15.75
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  193.25 |    7.28 |  180.00 |  216.00
***** Episode 37200, Mean R = 202.2  Std R = 7.0  Min R = 189.9
PolicyLoss: 0.0205
Policy_Beta: 0.1
Policy_Entropy: 0.223
Policy_KL: 0.00129
Policy_SD: 0.832
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 7.2e+06
VF_0_ExplainedVarNew: 0.946
VF_0_Explain

optflow_error |  0.0022 |  0.0586 |  0.0000 | 11.8333
pixel_icoords |   -0.14   -0.10 |    6.50    6.75 |  -13.76  -15.34 |   15.80   15.60
theta_cv |    0.05 |    0.03 |    0.00 |    0.20
steps    |  192.62 |    7.78 |  179.00 |  214.00
***** Episode 37510, Mean R = 198.6  Std R = 7.8  Min R = 181.1
PolicyLoss: -0.0077
Policy_Beta: 0.1
Policy_Entropy: 0.221
Policy_KL: 0.000665
Policy_SD: 0.824
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 7.26e+06
VF_0_ExplainedVarNew: 0.949
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.204


ADV1:  0.0005453623467213935 0.02703315194262039 0.2386767153687097 -0.2928590689952538
ADV2:  -0.0003378081227911952 0.71736294807229 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.9538   1.7546   6.4500  66.6180  37.8963  21.3128
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0128   0.0060   0.0275   1.2858   0.7421   0.3504
***** Episode 37541, Mean R = 199.2  Std R = 9.0  Min R = 182.6
PolicyLoss: -0.00769
Policy_Beta: 0.1
Policy_Entropy: 0.2

optflow_error |  0.0026 |  0.0763 |  0.0000 | 11.2713
pixel_icoords |    0.29   -0.09 |    6.77    6.42 |  -17.25  -15.72 |   16.13   16.57
theta_cv |    0.05 |    0.03 |    0.00 |    0.20
steps    |  193.15 |    6.90 |  179.00 |  218.00
***** Episode 37820, Mean R = 199.5  Std R = 8.0  Min R = 185.5
PolicyLoss: -0.0193
Policy_Beta: 0.1
Policy_Entropy: 0.22
Policy_KL: 0.000957
Policy_SD: 0.816
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 7.32e+06
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.121


ADV1:  -0.008110948667087501 0.028724781293514978 0.2296027332223255 -0.26512709527406797
ADV2:  0.07118917063951141 0.7743619265321945 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   8.2114   5.6981  21.4798  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0159   0.0083   0.0338   1.2858   0.7421   0.3504
***** Episode 37851, Mean R = 197.6  Std R = 9.4  Min R = 185.2
PolicyLoss: -0.0243
Policy_Beta: 0.1
Policy_Entropy: 0.224
P

tracking_error |  0.0041 |  0.0028 |  0.0000 |  0.1153
optflow_error |  0.0022 |  0.0342 |  0.0000 |  5.3057
pixel_icoords |    0.88   -0.39 |    6.73    6.46 |  -15.53  -16.26 |   16.17   15.67
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.15 |    6.57 |  180.00 |  215.00
***** Episode 38130, Mean R = 199.2  Std R = 7.8  Min R = 179.7
PolicyLoss: -0.00716
Policy_Beta: 0.1
Policy_Entropy: 0.221
Policy_KL: 0.00205
Policy_SD: 0.808
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 7.38e+06
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.0962


ADV1:  0.0005075236100258938 0.03054850044855191 0.26863320564342086 -0.302068541853161
ADV2:  -0.008400691469340894 0.734582898352951 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.6969   4.1722  22.0358  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0057   0.0026   0.0126   1.2858   0.7421   0.3504
***** Episode 38161, Mean R = 200.3  Std R = 7.9  Min R = 182.2
P

tracking_error |  0.0041 |  0.0032 |  0.0000 |  0.1806
optflow_error |  0.0023 |  0.0588 |  0.0000 |  9.0004
pixel_icoords |    0.13   -0.26 |    6.94    6.97 |  -16.40  -17.84 |   15.37   15.11
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.85 |    6.95 |  180.00 |  216.00
***** Episode 38440, Mean R = 198.5  Std R = 6.4  Min R = 188.0
PolicyLoss: 0.0194
Policy_Beta: 0.1
Policy_Entropy: 0.234
Policy_KL: 0.000984
Policy_SD: 0.809
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 7.44e+06
VF_0_ExplainedVarNew: 0.962
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.146


ADV1:  0.0032251340846222837 0.031042459681166437 0.1979417071155286 -0.35763156794453815
ADV2:  -0.000810260957620378 0.7520967427258309 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.1088   1.6650   8.3065  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0060   0.0028   0.0116   1.2858   0.7421   0.3504
***** Episode 38471, Mean R = 198.4  Std R = 6.9  Min R = 184.2
Pol

tracking_error |  0.0040 |  0.0028 |  0.0000 |  0.1289
optflow_error |  0.0020 |  0.0302 |  0.0000 |  6.1927
pixel_icoords |   -0.54    0.52 |    6.52    6.55 |  -16.20  -14.73 |   15.94   15.63
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  192.58 |    7.01 |  179.00 |  214.00
***** Episode 38750, Mean R = 200.1  Std R = 8.3  Min R = 179.3
PolicyLoss: -0.00553
Policy_Beta: 0.1
Policy_Entropy: 0.233
Policy_KL: 0.000791
Policy_SD: 0.813
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 7.5e+06
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.192


ADV1:  -0.0024545296544060554 0.026075230676916923 0.16514533169104284 -0.2639814962095165
ADV2:  0.02920280344173736 0.7755472757607957 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  16.3855  12.6976  44.6920  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0012   0.0059   1.2858   0.7421   0.3504
***** Episode 38781, Mean R = 199.0  Std R = 7.8  Min R = 181.1


tracking_error |  0.0038 |  0.0027 |  0.0000 |  0.0689
optflow_error |  0.0018 |  0.0185 |  0.0000 |  2.6121
pixel_icoords |   -1.06    0.64 |    6.39    6.46 |  -17.04  -14.38 |   13.69   15.87
theta_cv |    0.05 |    0.03 |    0.00 |    0.21
steps    |  192.94 |    6.93 |  179.00 |  213.00
***** Episode 39060, Mean R = 202.2  Std R = 8.7  Min R = 188.6
PolicyLoss: -0.018
Policy_Beta: 0.1
Policy_Entropy: 0.236
Policy_KL: 0.00136
Policy_SD: 0.803
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 7.56e+06
VF_0_ExplainedVarNew: 0.92
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.195


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  -0.004325820384045559 0.029068097256382708 0.25960831642150883 -0.3796608234759977
ADV2:  0.03908111348341539 0.7288796888605095 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  14.0578  10.3

optflow_error |  0.0020 |  0.0286 |  0.0000 |  4.5801
pixel_icoords |   -0.44   -0.18 |    6.46    6.92 |  -16.51  -15.57 |   14.15   16.05
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.37 |    7.13 |  179.00 |  215.00
***** Episode 39370, Mean R = 200.3  Std R = 6.3  Min R = 189.1
PolicyLoss: -0.0184
Policy_Beta: 0.1
Policy_Entropy: 0.229
Policy_KL: 0.00147
Policy_SD: 0.8
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 7.62e+06
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.122


ADV1:  -0.001532892621193912 0.025049780526369237 0.24340447792630338 -0.25201483212062736
ADV2:  0.004050176611245482 0.7310372157722647 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.1744   2.9341  13.9207  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0011   0.0053   1.2858   0.7421   0.3504
***** Episode 39401, Mean R = 198.0  Std R = 8.8  Min R = 178.1
PolicyLoss: 0.00529
Policy_Beta: 0.1
Policy_Entropy: 0.247
Po

tracking_error |  0.0041 |  0.0057 |  0.0000 |  0.9558
optflow_error |  0.0038 |  0.2447 |  0.0000 | 45.8640
pixel_icoords |   -0.16   -0.15 |    6.72    6.53 |  -14.98  -14.82 |   16.99   17.18
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.21 |    7.44 |  180.00 |  218.00
***** Episode 39680, Mean R = 198.1  Std R = 7.3  Min R = 179.6
PolicyLoss: -0.0204
Policy_Beta: 0.1
Policy_Entropy: 0.237
Policy_KL: 0.000949
Policy_SD: 0.797
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 7.68e+06
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.18


ADV1:  0.008056065606100993 0.032783878498579604 0.39593095651514 -0.22793915160884903
ADV2:  -0.05540073779677422 0.7332849523633943 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.7679   1.3655   5.6524  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0134   0.0064   0.0311   1.2858   0.7421   0.3504
***** Episode 39711, Mean R = 200.3  Std R = 6.7  Min R = 183.3
Poli

tracking_error |  0.0040 |  0.0038 |  0.0000 |  0.5195
optflow_error |  0.0026 |  0.1249 |  0.0000 | 24.9696
pixel_icoords |   -0.81   -0.09 |    6.96    6.64 |  -16.77  -15.46 |   15.05   14.49
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  193.67 |    6.70 |  180.00 |  212.00
***** Episode 39990, Mean R = 199.9  Std R = 7.6  Min R = 187.6
PolicyLoss: -0.00543
Policy_Beta: 0.1
Policy_Entropy: 0.229
Policy_KL: 0.000962
Policy_SD: 0.794
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 7.74e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.172


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  -0.00017458829489564397 0.027891911769104244 0.2795739054679871 -0.2674932379432396
ADV2:  0.00791128353554798 0.7280581365503589 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.8617   4

tracking_error |  0.0039 |  0.0028 |  0.0000 |  0.0633
optflow_error |  0.0019 |  0.0247 |  0.0000 |  3.2198
pixel_icoords |    0.08   -0.02 |    6.75    6.63 |  -14.87  -15.25 |   18.32   16.43
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  193.04 |    6.83 |  180.00 |  216.00
***** Episode 40300, Mean R = 198.3  Std R = 7.7  Min R = 181.6
PolicyLoss: -0.008
Policy_Beta: 0.1
Policy_Entropy: 0.24
Policy_KL: 0.00204
Policy_SD: 0.767
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 7.8e+06
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.105


ADV1:  0.000395393638774102 0.022040933122781325 0.18956935263772656 -0.22194423657441203
ADV2:  -0.005627428038811471 0.744464851615171 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.3518   4.7823  20.8288  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0094   0.0044   0.0176   1.2858   0.7421   0.3504
***** Episode 40331, Mean R = 200.0  Std R = 8.2  Min R = 184.1
Poli

optflow_error |  0.0022 |  0.0631 |  0.0000 | 13.9960
pixel_icoords |   -0.64   -0.02 |    6.88    6.75 |  -15.16  -18.16 |   17.93   14.78
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.86 |    7.01 |  179.00 |  213.00
***** Episode 40610, Mean R = 200.7  Std R = 7.5  Min R = 180.4
PolicyLoss: -0.032
Policy_Beta: 0.1
Policy_Entropy: 0.24
Policy_KL: 0.000618
Policy_SD: 0.809
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 7.86e+06
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.197


ADV1:  -5.283022529254503e-05 0.025971225682328705 0.26840004622936253 -0.245780560859851
ADV2:  -0.020833742068777156 0.702548384660113 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.7629   1.5370   7.4981  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0050   0.0015   0.0076   1.2858   0.7421   0.3504
***** Episode 40641, Mean R = 201.4  Std R = 7.0  Min R = 184.1
PolicyLoss: 0.0236
Policy_Beta: 0.1
Policy_Entropy: 0.235


optflow_error |  0.0022 |  0.0468 |  0.0000 |  6.5225
pixel_icoords |   -0.25    0.29 |    6.74    6.92 |  -16.49  -18.07 |   16.03   17.17
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.31 |    7.08 |  180.00 |  213.00
***** Episode 40920, Mean R = 199.1  Std R = 9.1  Min R = 181.7
PolicyLoss: -0.012
Policy_Beta: 0.1
Policy_Entropy: 0.231
Policy_KL: 0.00144
Policy_SD: 0.812
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 7.92e+06
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.151


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  0.0042037428469751115 0.021821881568118217 0.1613445257133223 -0.2129814058124204
ADV2:  -0.002128873514148293 0.7758459999866937 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.3337   1.2962   6.4361  66.6180  37.8963  21.4200
ValFun  Gradie

tracking_error |  0.0036 |  0.0027 |  0.0000 |  0.1685
optflow_error |  0.0022 |  0.0443 |  0.0000 |  8.0767
pixel_icoords |   -0.02   -0.06 |    6.46    6.58 |  -15.00  -16.46 |   15.79   15.92
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  192.60 |    6.51 |  180.00 |  212.00
***** Episode 41230, Mean R = 198.3  Std R = 8.1  Min R = 182.5
PolicyLoss: -0.0287
Policy_Beta: 0.1
Policy_Entropy: 0.231
Policy_KL: 0.00193
Policy_SD: 0.799
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 7.97e+06
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.129


ADV1:  0.0028056877590856 0.024495621396316745 0.1846889747553555 -0.2098707326468801
ADV2:  -0.027424508371521057 0.752288608054003 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.0601   3.1905  15.3735  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0088   0.0052   0.0195   1.2858   0.7421   0.3504
***** Episode 41261, Mean R = 199.3  Std R = 7.7  Min R = 181.1
Polic

optflow_error |  0.0019 |  0.0238 |  0.0000 |  4.2293
pixel_icoords |    0.51   -0.52 |    6.44    6.56 |  -15.84  -17.07 |   15.10   18.45
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  192.72 |    6.66 |  180.00 |  214.00
***** Episode 41540, Mean R = 199.5  Std R = 7.7  Min R = 182.6
PolicyLoss: -0.0307
Policy_Beta: 0.1
Policy_Entropy: 0.24
Policy_KL: 0.000927
Policy_SD: 0.796
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 8.03e+06
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.128


ADV1:  -0.0005276828822621762 0.0245283789816151 0.1986281678417463 -0.2178504664895086
ADV2:  0.009960075876201098 0.7435503718352163 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.9543   3.7242  16.7244  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0014   0.0054   1.2858   0.7421   0.3504
***** Episode 41571, Mean R = 199.4  Std R = 7.9  Min R = 181.0
PolicyLoss: -0.00916
Policy_Beta: 0.1
Policy_Entropy: 0.25

optflow_error |  0.0025 |  0.1064 |  0.0000 | 24.5020
pixel_icoords |    0.01    0.47 |    6.61    6.30 |  -15.83  -14.01 |   16.35   16.73
theta_cv |    0.05 |    0.03 |    0.00 |    0.20
steps    |  193.12 |    7.26 |  180.00 |  218.00
***** Episode 41850, Mean R = 201.5  Std R = 9.3  Min R = 186.0
PolicyLoss: -0.0198
Policy_Beta: 0.1
Policy_Entropy: 0.252
Policy_KL: 0.00162
Policy_SD: 0.818
Policy_lr_mult: 1
Steps: 6.06e+03
TotalSteps: 8.09e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.253


ADV1:  0.004312742240050732 0.02558089755371258 0.23365373431888692 -0.18940869429951618
ADV2:  -0.038447633623397974 0.7611758278682531 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.5046   2.2354  10.3561  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0129   0.0078   0.0298   1.2858   0.7421   0.3504
***** Episode 41881, Mean R = 198.4  Std R = 6.9  Min R = 180.3
PolicyLoss: 0.026
Policy_Beta: 0.1
Policy_Entropy: 0.244

tracking_error |  0.0037 |  0.0026 |  0.0000 |  0.2033
optflow_error |  0.0021 |  0.0476 |  0.0000 |  9.3541
pixel_icoords |   -0.47   -0.34 |    6.55    6.93 |  -16.88  -16.30 |   13.65   15.16
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.39 |    7.33 |  180.00 |  222.00
***** Episode 42160, Mean R = 200.5  Std R = 7.3  Min R = 181.0
PolicyLoss: -0.0122
Policy_Beta: 0.1
Policy_Entropy: 0.247
Policy_KL: 0.00115
Policy_SD: 0.797
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 8.15e+06
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.0961


ADV1:  -0.0019229485605563928 0.02741717263471895 0.2042392490308953 -0.42308957576751705
ADV2:  0.006331708371111003 0.7489968439346566 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.9302   2.2265   9.3494  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0049   0.0018   0.0095   1.2858   0.7421   0.3504
***** Episode 42191, Mean R = 200.3  Std R = 6.5  Min R = 188.7


tracking_error |  0.0038 |  0.0033 |  0.0000 |  0.5420
optflow_error |  0.0022 |  0.1060 |  0.0000 | 25.5994
pixel_icoords |   -0.09    0.18 |    6.78    6.52 |  -15.68  -16.63 |   15.36   14.56
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.12 |    7.08 |  179.00 |  215.00
***** Episode 42470, Mean R = 197.4  Std R = 7.9  Min R = 181.2
PolicyLoss: 0.00216
Policy_Beta: 0.1
Policy_Entropy: 0.244
Policy_KL: 0.00113
Policy_SD: 0.829
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 8.21e+06
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.174


ADV1:  0.006577825783743003 0.026914532767116025 0.2674980074077351 -0.1701202543235325
ADV2:  -0.0801502264207402 0.6991471681385064 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.3234   3.8544  13.4775  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0110   0.0066   0.0232   1.2858   0.7421   0.3504
***** Episode 42501, Mean R = 201.8  Std R = 7.5  Min R = 182.0
Poli

theta_cv |    0.05 |    0.03 |    0.00 |    0.16
steps    |  193.28 |    7.09 |  180.00 |  217.00
***** Episode 42780, Mean R = 201.4  Std R = 7.6  Min R = 186.2
PolicyLoss: -0.00623
Policy_Beta: 0.1
Policy_Entropy: 0.243
Policy_KL: 0.000373
Policy_SD: 0.824
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 8.27e+06
VF_0_ExplainedVarNew: 0.938
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.141


ADV1:  0.0032200310481107107 0.023807303779399065 0.2272908894150869 -0.22189786197465122
ADV2:  -0.0284475228850313 0.7282599554942021 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   7.0646   4.7660  22.5435  66.6180  37.8963  21.4200
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0089   0.0044   0.0173   1.2858   0.7421   0.3504
***** Episode 42811, Mean R = 200.2  Std R = 7.9  Min R = 188.6
PolicyLoss: -0.0019
Policy_Beta: 0.1
Policy_Entropy: 0.241
Policy_KL: 0.002
Policy_SD: 0.834
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 8.28e+06
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOl

optflow_error |  0.0019 |  0.0335 |  0.0000 |  6.3895
pixel_icoords |   -0.02    0.32 |    7.02    6.68 |  -16.79  -17.77 |   17.49   15.78
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.41 |    7.05 |  179.00 |  217.00
***** Episode 43090, Mean R = 198.6  Std R = 8.1  Min R = 180.4
PolicyLoss: 0.00277
Policy_Beta: 0.1
Policy_Entropy: 0.243
Policy_KL: 0.00721
Policy_SD: 0.825
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 8.33e+06
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.155


ADV1:  0.005327432945380852 0.019275489915361755 0.30755482912063603 -0.1631805096112644
ADV2:  -0.0632874556320825 0.7413962046383612 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   9.5611   5.9816  29.1273  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0104   0.0059   0.0208   1.2858   0.7421   0.3504
***** Episode 43121, Mean R = 201.5  Std R = 6.5  Min R = 189.8
PolicyLoss: 0.0131
Policy_Beta: 0.1
Policy_Entropy: 0.236


tracking_error |  0.0037 |  0.0025 |  0.0000 |  0.0661
optflow_error |  0.0018 |  0.0204 |  0.0000 |  3.0293
pixel_icoords |    0.35    0.57 |    7.45    6.90 |  -17.02  -16.84 |   17.37   17.23
theta_cv |    0.05 |    0.03 |    0.00 |    0.20
steps    |  193.62 |    7.56 |  180.00 |  227.00
***** Episode 43400, Mean R = 199.8  Std R = 9.4  Min R = 177.1
PolicyLoss: 0.00739
Policy_Beta: 0.1
Policy_Entropy: 0.249
Policy_KL: 0.000636
Policy_SD: 0.827
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 8.39e+06
VF_0_ExplainedVarNew: 0.823
VF_0_ExplainedVarOld: 0.961
VF_0_Loss : 0.189


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  -0.0028155450064571187 0.024949783222499747 0.16708050878803016 -0.27588827764060553
ADV2:  0.031986019831773835 0.7914428767626539 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.5528

tracking_error |  0.0037 |  0.0033 |  0.0000 |  0.5170
optflow_error |  0.0022 |  0.1025 |  0.0000 | 24.5382
pixel_icoords |    0.70   -0.18 |    7.02    6.60 |  -14.43  -16.67 |   15.34   17.13
theta_cv |    0.05 |    0.03 |    0.00 |    0.20
steps    |  192.96 |    6.51 |  180.00 |  218.00
***** Episode 43710, Mean R = 200.0  Std R = 7.7  Min R = 181.6
PolicyLoss: -0.00648
Policy_Beta: 0.1
Policy_Entropy: 0.249
Policy_KL: 0.00107
Policy_SD: 0.815
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 8.45e+06
VF_0_ExplainedVarNew: 0.926
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.121


ADV1:  -0.0021147827642231747 0.021735615807245253 0.13164232969284062 -0.3222283907883766
ADV2:  0.03991958910994599 0.7528304556748779 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   9.9952   5.3340  25.1140  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0036   0.0019   0.0080   1.2858   0.7421   0.3504
***** Episode 43741, Mean R = 201.7  Std R = 8.5  Min R = 185.3
Pol

tracking_error |  0.0037 |  0.0026 |  0.0000 |  0.0415
optflow_error |  0.0017 |  0.0150 |  0.0000 |  1.8999
pixel_icoords |    0.04    0.23 |    6.55    7.08 |  -15.16  -15.72 |   14.77   16.49
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.52 |    7.15 |  180.00 |  220.00
***** Episode 44020, Mean R = 199.3  Std R = 7.5  Min R = 180.4
PolicyLoss: -0.0425
Policy_Beta: 0.1
Policy_Entropy: 0.259
Policy_KL: 0.00114
Policy_SD: 0.806
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 8.51e+06
VF_0_ExplainedVarNew: 0.911
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.13


ADV1:  0.0023425977164236228 0.025919695313373466 0.21782664554719333 -0.32217903137207027
ADV2:  -0.00010125652053926442 0.7543753963319804 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   8.9643   5.2214  22.3976  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0067   0.0039   0.0195   1.2858   0.7421   0.3504
***** Episode 44051, Mean R = 199.8  Std R = 7.1  Min R = 181.

tracking_error |  0.0036 |  0.0026 |  0.0000 |  0.1705
optflow_error |  0.0019 |  0.0392 |  0.0000 |  7.8195
pixel_icoords |    0.39   -0.50 |    7.20    6.74 |  -16.35  -15.69 |   15.41   16.07
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.45 |    6.92 |  180.00 |  217.00
***** Episode 44330, Mean R = 201.3  Std R = 8.5  Min R = 181.0
PolicyLoss: -0.0149
Policy_Beta: 0.1
Policy_Entropy: 0.257
Policy_KL: 0.00194
Policy_SD: 0.821
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 8.57e+06
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.215


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  0.0009769114343504773 0.024562414651490127 0.21373538251024682 -0.19764907801363213
ADV2:  -0.018309095496089306 0.7550845238281785 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.8214 

optflow_error |  0.0019 |  0.0316 |  0.0000 |  5.3432
pixel_icoords |   -0.37    0.23 |    6.78    6.59 |  -17.27  -15.76 |   16.98   15.98
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.72 |    7.32 |  179.00 |  225.00
***** Episode 44640, Mean R = 200.7  Std R = 6.5  Min R = 187.7
PolicyLoss: -0.0201
Policy_Beta: 0.1
Policy_Entropy: 0.262
Policy_KL: 0.00186
Policy_SD: 0.794
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 8.63e+06
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.122


ADV1:  -0.0006110210659579503 0.024794286183659097 0.2196438783994477 -0.20631166046333468
ADV2:  0.002379815579757551 0.7496466015749128 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.2992   2.4571  10.4367  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0015   0.0065   1.2858   0.7421   0.3504
***** Episode 44671, Mean R = 199.6  Std R = 7.5  Min R = 186.4
PolicyLoss: -0.000668
Policy_Beta: 0.1
Policy_Entropy: 

***** Episode 44950, Mean R = 203.6  Std R = 6.3  Min R = 192.8
PolicyLoss: 0.00712
Policy_Beta: 0.1
Policy_Entropy: 0.26
Policy_KL: 0.00186
Policy_SD: 0.808
Policy_lr_mult: 1
Steps: 6.06e+03
TotalSteps: 8.69e+06
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.164


ADV1:  -0.00536639854773613 0.02530458184411682 0.1773358879550052 -0.3320654382418954
ADV2:  0.056389911688608095 0.7061735368497125 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.5142   4.2637  14.4465  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0111   0.0077   0.0287   1.2858   0.7421   0.3504
***** Episode 44981, Mean R = 200.2  Std R = 8.9  Min R = 182.3
PolicyLoss: -0.0275
Policy_Beta: 0.1
Policy_Entropy: 0.251
Policy_KL: 0.000524
Policy_SD: 0.798
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 8.7e+06
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.151


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.136

optflow_error |  0.0019 |  0.0345 |  0.0000 |  4.8135
pixel_icoords |   -0.53   -0.07 |    6.39    6.99 |  -15.45  -16.43 |   16.47   17.36
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  192.81 |    7.32 |  180.00 |  217.00
***** Episode 45260, Mean R = 200.1  Std R = 7.8  Min R = 183.4
PolicyLoss: 0.0025
Policy_Beta: 0.1
Policy_Entropy: 0.252
Policy_KL: 0.000842
Policy_SD: 0.803
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 8.75e+06
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.221


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  -0.0021077318664351687 0.020507140429197622 0.1628107531416192 -0.36378955841064453
ADV2:  0.02393795952318645 0.7348568898563359 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   8.3635   6.8458  23.9511  74.3871  37.8963  27.0569
ValFun  Grad

theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.76 |    6.76 |  180.00 |  216.00
***** Episode 45570, Mean R = 200.1  Std R = 7.5  Min R = 183.4
PolicyLoss: -0.00865
Policy_Beta: 0.1
Policy_Entropy: 0.253
Policy_KL: 0.00171
Policy_SD: 0.817
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 8.81e+06
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.173


ADV1:  -0.0018565346582023743 0.02279469873963318 0.19659819608945978 -0.2352854813610209
ADV2:  0.02344471844189686 0.7468097457950017 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  13.3083  15.6532  57.0895  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0012   0.0052   1.2858   0.7421   0.3504
***** Episode 45601, Mean R = 197.9  Std R = 7.0  Min R = 183.5
PolicyLoss: -0.0106
Policy_Beta: 0.1
Policy_Entropy: 0.258
Policy_KL: 0.000715
Policy_SD: 0.801
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 8.82e+06
VF_0_ExplainedVarNew: 0.96
VF_0_ExplainedVarO

***** Episode 45880, Mean R = 200.0  Std R = 7.0  Min R = 189.0
PolicyLoss: -0.00825
Policy_Beta: 0.1
Policy_Entropy: 0.255
Policy_KL: 0.00177
Policy_SD: 0.797
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 8.87e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.128


ADV1:  0.0010695249451422714 0.023936604057127452 0.27891843665789706 -0.1859678755515669
ADV2:  -0.02318501689183272 0.7464959433319096 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   7.3272   4.3389  22.5723  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0079   0.0031   0.0140   1.2858   0.7421   0.3504
***** Episode 45911, Mean R = 200.0  Std R = 7.7  Min R = 184.7
PolicyLoss: 0.0186
Policy_Beta: 0.1
Policy_Entropy: 0.261
Policy_KL: 0.00129
Policy_SD: 0.798
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 8.88e+06
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.123


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2

a_f      |    0.02    0.03 |    0.26    1.78 |   -0.60   -3.14 |    0.63    3.12
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    8.97 |    3.04 |    0.00 |   10.00
tracking_rewards |  191.13 |    6.72 |  177.61 |  216.63
tracking_error |  0.0035 |  0.0024 |  0.0000 |  0.0776
optflow_error |  0.0019 |  0.0270 |  0.0000 |  3.6820
pixel_icoords |    0.69    0.09 |    6.62    6.46 |  -15.98  -17.03 |   16.34   17.31
theta_cv |    0.05 |    0.03 |    0.00 |    0.16
steps    |  192.95 |    6.75 |  180.00 |  218.00
***** Episode 46190, Mean R = 199.1  Std R = 6.2  Min R = 185.0
PolicyLoss: 0.0116
Policy_Beta: 0.1
Policy_Entropy: 0.262
Policy_KL: 0.000903
Policy_SD: 0.805
Policy_lr_mult: 1
Steps: 5.95e+03
TotalSteps: 8.93e+06
VF_0_ExplainedVarNew: 0.962
VF_0_Expla

tracking_error |  0.0036 |  0.0032 |  0.0000 |  0.4722
optflow_error |  0.0022 |  0.0957 |  0.0000 | 22.1445
pixel_icoords |    0.10   -0.45 |    6.76    6.58 |  -17.36  -15.07 |   15.78   15.73
theta_cv |    0.05 |    0.03 |    0.00 |    0.20
steps    |  192.78 |    6.71 |  180.00 |  212.00
***** Episode 46500, Mean R = 203.4  Std R = 8.6  Min R = 187.2
PolicyLoss: -0.0102
Policy_Beta: 0.1
Policy_Entropy: 0.248
Policy_KL: 0.00149
Policy_SD: 0.81
Policy_lr_mult: 1
Steps: 6.06e+03
TotalSteps: 8.99e+06
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.211


ADV1:  -0.0031826357074727515 0.024322647330722265 0.13809720859750585 -0.334557458340079
ADV2:  0.057697479168512304 0.6791834899008907 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6893   1.2140   5.3548  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0068   0.0025   0.0115   1.2858   0.7421   0.3504
***** Episode 46531, Mean R = 202.3  Std R = 8.2  Min R = 183.4
Pol

***** Episode 46810, Mean R = 200.7  Std R = 6.5  Min R = 188.5
PolicyLoss: 0.0027
Policy_Beta: 0.1
Policy_Entropy: 0.254
Policy_KL: 0.00139
Policy_SD: 0.789
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 9.05e+06
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.0963


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  -0.003919385449533736 0.022386383034666255 0.17076918349879744 -0.20935714547642958
ADV2:  0.04336073961713971 0.7751671205536751 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  18.3406  15.4579  51.5745  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0071   0.0039   0.0147   1.2858   0.7421   0.3504
***** Episode 46841, Mean R = 199.7  Std R = 6.5  Min R = 182.8
PolicyLoss: -0.0215
Policy_Beta: 0.1
Policy_Entropy: 0.25
Policy_KL: 0.00429
Policy

optflow_error |  0.0019 |  0.0666 |  0.0000 | 15.9882
pixel_icoords |    0.46    0.46 |    6.55    6.87 |  -16.71  -17.96 |   15.87   16.98
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.20 |    7.24 |  179.00 |  215.00
***** Episode 47120, Mean R = 200.0  Std R = 6.3  Min R = 189.8
PolicyLoss: -0.029
Policy_Beta: 0.1
Policy_Entropy: 0.25
Policy_KL: 0.000791
Policy_SD: 0.799
Policy_lr_mult: 1
Steps: 5.94e+03
TotalSteps: 9.11e+06
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.158


ADV1:  -0.004067570760686699 0.02476755849568708 0.1628467866511727 -0.34105752131967443
ADV2:  0.03833302472071062 0.7286779264136086 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9961   2.2566  10.2420  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0049   0.0020   0.0106   1.2858   0.7421   0.3504
***** Episode 47151, Mean R = 200.2  Std R = 7.5  Min R = 183.5
PolicyLoss: -0.0155
Policy_Beta: 0.1
Policy_Entropy: 0.258


optflow_error |  0.0018 |  0.0224 |  0.0000 |  3.2584
pixel_icoords |   -0.20   -0.14 |    7.12    6.71 |  -16.26  -16.56 |   16.00   15.77
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.50 |    7.40 |  179.00 |  222.00
***** Episode 47430, Mean R = 199.0  Std R = 8.7  Min R = 183.7
PolicyLoss: -0.00756
Policy_Beta: 0.1
Policy_Entropy: 0.258
Policy_KL: 0.00176
Policy_SD: 0.812
Policy_lr_mult: 1
Steps: 5.96e+03
TotalSteps: 9.17e+06
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.154


ADV1:  0.0010839602143088757 0.022116933655221594 0.16851324141025548 -0.23474410048792987
ADV2:  0.007188333584627604 0.7359634610091211 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   8.2864   4.5730  18.7364  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0010   0.0046   1.2858   0.7421   0.3504
***** Episode 47461, Mean R = 201.3  Std R = 6.6  Min R = 189.6
PolicyLoss: -0.0207
Policy_Beta: 0.1
Policy_Entropy: 0

tracking_error |  0.0036 |  0.0035 |  0.0000 |  0.5382
optflow_error |  0.0025 |  0.1187 |  0.0000 | 25.9402
pixel_icoords |   -0.46    0.22 |    6.75    6.90 |  -15.37  -15.49 |   13.45   15.05
theta_cv |    0.05 |    0.03 |    0.00 |    0.21
steps    |  193.81 |    6.88 |  179.00 |  213.00
***** Episode 47740, Mean R = 200.7  Std R = 9.4  Min R = 180.6
PolicyLoss: 0.00358
Policy_Beta: 0.1
Policy_Entropy: 0.249
Policy_KL: 0.00577
Policy_SD: 0.81
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 9.23e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.234


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  0.0042405948268236255 0.020620758776406265 0.23998260037878727 -0.20367997960934292
ADV2:  -0.043640178170806114 0.6700996882038013 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.5161  

optflow_error |  0.0019 |  0.0346 |  0.0000 |  6.1557
pixel_icoords |    0.63    0.53 |    6.82    7.18 |  -16.75  -17.27 |   16.73   17.46
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  193.99 |    7.26 |  180.00 |  222.00
***** Episode 48050, Mean R = 201.3  Std R = 7.6  Min R = 186.1
PolicyLoss: -0.0292
Policy_Beta: 0.1
Policy_Entropy: 0.263
Policy_KL: 0.00123
Policy_SD: 0.797
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 9.29e+06
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.174


ADV1:  0.0002491119155246567 0.020154408653662716 0.24376014259023981 -0.23098684227059835
ADV2:  -0.012549412172651342 0.6820416228388734 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   8.1846   4.5436  19.8584  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0064   0.0027   0.0127   1.2858   0.7421   0.3504
***** Episode 48081, Mean R = 203.7  Std R = 8.4  Min R = 184.1
PolicyLoss: 0.0106
Policy_Beta: 0.1
Policy_Entropy: 0.258


***** Episode 48360, Mean R = 200.8  Std R = 8.9  Min R = 184.0
PolicyLoss: -0.0285
Policy_Beta: 0.1
Policy_Entropy: 0.257
Policy_KL: 0.00143
Policy_SD: 0.81
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 9.35e+06
VF_0_ExplainedVarNew: 0.906
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.203


ADV1:  -0.0017592056757692758 0.023099844899480845 0.2278700099215898 -0.1621688698942974
ADV2:  0.004248722731979715 0.7844319075286523 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  14.9304  13.8020  44.6483  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0010   0.0051   1.2858   0.7421   0.3504
***** Episode 48391, Mean R = 198.6  Std R = 6.0  Min R = 180.5
PolicyLoss: 0.000361
Policy_Beta: 0.1
Policy_Entropy: 0.257
Policy_KL: 0.00121
Policy_SD: 0.786
Policy_lr_mult: 1
Steps: 5.93e+03
TotalSteps: 9.36e+06
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.0754


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 

optflow_error |  0.0025 |  0.1651 |  0.0000 | 39.7194
pixel_icoords |   -0.63    0.01 |    6.48    7.03 |  -15.13  -18.45 |   16.59   16.63
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  193.03 |    6.69 |  181.00 |  213.00
***** Episode 48670, Mean R = 202.9  Std R = 6.6  Min R = 189.6
PolicyLoss: 0.0058
Policy_Beta: 0.1
Policy_Entropy: 0.257
Policy_KL: 0.00127
Policy_SD: 0.789
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 9.41e+06
VF_0_ExplainedVarNew: 0.952
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.196


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  -0.005137159035249008 0.02406147931780329 0.16469513177871709 -0.2669784118619506
ADV2:  0.06727422986453817 0.7426524382179414 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  11.8420   7.2092  34.8630  74.3871  37.8963  27.0569
ValFun  Gradien

w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    9.00 |    3.00 |    0.00 |   10.00
tracking_rewards |  191.61 |    7.03 |  178.12 |  217.64
tracking_error |  0.0037 |  0.0025 |  0.0000 |  0.1175
optflow_error |  0.0019 |  0.0369 |  0.0000 |  5.4325
pixel_icoords |   -0.71   -0.60 |    6.61    6.74 |  -15.56  -16.79 |   15.11   15.10
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.41 |    7.02 |  180.00 |  220.00
***** Episode 48980, Mean R = 199.6  Std R = 6.4  Min R = 183.6
PolicyLoss: -0.0107
Policy_Beta: 0.1
Policy_Entropy: 0.262
Policy_KL: 0.00127
Policy_SD: 0.792
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 9.47e+06
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.153


ADV1:  -0.000353518548237537 0.025242077725

tracking_error |  0.0040 |  0.0063 |  0.0000 |  1.0647
optflow_error |  0.0034 |  0.2718 |  0.0000 | 51.1787
pixel_icoords |   -0.30    0.19 |    6.12    6.70 |  -15.34  -16.13 |   13.10   13.73
theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  192.82 |    6.62 |  181.00 |  219.00
***** Episode 49290, Mean R = 201.4  Std R = 6.7  Min R = 189.6
PolicyLoss: -0.0247
Policy_Beta: 0.1
Policy_Entropy: 0.269
Policy_KL: 0.00102
Policy_SD: 0.777
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 9.53e+06
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.149


ADV1:  0.0006239570393529361 0.02032752732637136 0.1671348143228486 -0.215932126001803
ADV2:  -0.009154847506266809 0.6886721942607614 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   8.8392   3.8315  19.2124  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0010   0.0042   1.2858   0.7421   0.3504
***** Episode 49321, Mean R = 202.3  Std R = 7.5  Min R = 182.1
Policy

a_f      |   -0.02   -0.09 |    0.27    1.72 |   -0.63   -3.13 |    0.55    3.13
w_f      |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00 |    0.00    0.00    0.00
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
hit_rewards |    9.19 |    2.72 |    0.00 |   10.00
tracking_rewards |  191.49 |    7.05 |  177.56 |  215.56
tracking_error |  0.0038 |  0.0026 |  0.0000 |  0.0846
optflow_error |  0.0018 |  0.0268 |  0.0000 |  3.9620
pixel_icoords |   -0.40   -0.20 |    6.70    6.85 |  -16.64  -17.16 |   15.51   14.94
theta_cv |    0.05 |    0.03 |    0.00 |    0.16
steps    |  193.30 |    7.08 |  179.00 |  218.00
***** Episode 49600, Mean R = 200.3  Std R = 7.3  Min R = 186.8
PolicyLoss: -0.0121
Policy_Beta: 0.1
Policy_Entropy: 0.265
Policy_KL: 0.00203
Policy_SD: 0.76
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 9.59e+06
VF_0_ExplainedVarNew: 0.972
VF_0_Explai

optflow_error |  0.0023 |  0.0695 |  0.0000 | 13.2885
pixel_icoords |   -0.30   -0.23 |    6.89    6.26 |  -16.72  -15.12 |   17.13   15.96
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.30 |    6.66 |  180.00 |  213.00
***** Episode 49910, Mean R = 199.9  Std R = 8.2  Min R = 184.2
PolicyLoss: 0.00764
Policy_Beta: 0.1
Policy_Entropy: 0.279
Policy_KL: 0.00129
Policy_SD: 0.769
Policy_lr_mult: 1
Steps: 6.00e+03
TotalSteps: 9.65e+06
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.959
VF_0_Loss : 0.0933


ADV1:  0.002844544477149979 0.022808836132916302 0.21282903045045043 -0.21968681538749185
ADV2:  -0.0006200695589857485 0.8086656568415916 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  16.0491  10.1444  41.8331  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0040   0.0015   0.0068   1.2858   0.7421   0.3504
***** Episode 49941, Mean R = 198.6  Std R = 5.4  Min R = 188.4
PolicyLoss: -0.00983
Policy_Beta: 0.1
Policy_Entropy:

***** Episode 50220, Mean R = 201.8  Std R = 7.1  Min R = 187.6
PolicyLoss: -0.00529
Policy_Beta: 0.1
Policy_Entropy: 0.264
Policy_KL: 0.00402
Policy_SD: 0.8
Policy_lr_mult: 1
Steps: 6.05e+03
TotalSteps: 9.71e+06
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.228


ADV1:  0.006585334698759208 0.023116122364617065 0.23013177594010992 -0.1455993946942441
ADV2:  -0.09440641419827368 0.6791348342863753 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.4583   4.0275  19.3968  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0155   0.0076   0.0277   1.2858   0.7421   0.3504
***** Episode 50251, Mean R = 202.6  Std R = 8.0  Min R = 189.6
PolicyLoss: 0.0337
Policy_Beta: 0.1
Policy_Entropy: 0.28
Policy_KL: 0.00172
Policy_SD: 0.787
Policy_lr_mult: 1
Steps: 6.03e+03
TotalSteps: 9.72e+06
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.215


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.136

theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  193.16 |    6.86 |  179.00 |  219.00
***** Episode 50530, Mean R = 201.7  Std R = 5.2  Min R = 191.8
PolicyLoss: 0.00206
Policy_Beta: 0.1
Policy_Entropy: 0.271
Policy_KL: 0.0011
Policy_SD: 0.76
Policy_lr_mult: 1
Steps: 6.00e+03
TotalSteps: 9.77e+06
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.122


ADV1:  -0.00360536700924167 0.020928251982477157 0.12358774542808537 -0.20663046863689127
ADV2:  0.04286945680579911 0.776368182002715 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  13.5806   6.5675  31.4201  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0062   0.0030   0.0116   1.2858   0.7421   0.3504
***** Episode 50561, Mean R = 199.9  Std R = 5.3  Min R = 192.1
PolicyLoss: -0.0303
Policy_Beta: 0.1
Policy_Entropy: 0.263
Policy_KL: 0.00204
Policy_SD: 0.765
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 9.78e+06
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 

***** Episode 50840, Mean R = 200.8  Std R = 6.3  Min R = 189.5
PolicyLoss: -0.0372
Policy_Beta: 0.1
Policy_Entropy: 0.272
Policy_KL: 0.00127
Policy_SD: 0.793
Policy_lr_mult: 1
Steps: 5.98e+03
TotalSteps: 9.83e+06
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.125


ADV1:  -0.0025587782584273644 0.024945319981365552 0.18180393717014176 -0.31266219806236795
ADV2:  0.024713846246825117 0.7404838854345963 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.0286   1.2450   6.3948  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0034   0.0021   0.0083   1.2858   0.7421   0.3504
***** Episode 50871, Mean R = 201.1  Std R = 8.9  Min R = 178.1
PolicyLoss: -0.00901
Policy_Beta: 0.1
Policy_Entropy: 0.27
Policy_KL: 0.00158
Policy_SD: 0.791
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 9.84e+06
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.17


ADV1:  0.0005178815526882873 0.021851591819863955 0.1882721700296553

optflow_error |  0.0018 |  0.0278 |  0.0000 |  4.3673
pixel_icoords |    0.31    0.24 |    6.77    6.68 |  -15.82  -15.46 |   16.69   15.95
theta_cv |    0.05 |    0.03 |    0.00 |    0.17
steps    |  193.30 |    7.30 |  180.00 |  218.00
***** Episode 51150, Mean R = 201.2  Std R = 7.8  Min R = 181.3
PolicyLoss: -0.00251
Policy_Beta: 0.1
Policy_Entropy: 0.263
Policy_KL: 0.0011
Policy_SD: 0.777
Policy_lr_mult: 1
Steps: 6.01e+03
TotalSteps: 9.89e+06
VF_0_ExplainedVarNew: 0.948
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.118


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  0.0026531462164187457 0.023154397924403426 0.22987331342131456 -0.22410066127777095
ADV2:  -0.01670175489092954 0.7116529192252075 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   9.3997   6.3037  22.6938  74.3871  37.8963  27.0569
ValFun  Gra

***** Episode 51460, Mean R = 201.2  Std R = 6.5  Min R = 183.8
PolicyLoss: -0.011
Policy_Beta: 0.1
Policy_Entropy: 0.266
Policy_KL: 0.00167
Policy_SD: 0.762
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 9.95e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.0959


ADV1:  0.0010032935206074312 0.0194760166333884 0.18856623319732901 -0.1818031255813562
ADV2:  -0.011813603434551916 0.712502866482513 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.4024   4.5869  21.9006  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0060   0.0032   0.0135   1.2858   0.7421   0.3504
***** Episode 51491, Mean R = 202.4  Std R = 7.3  Min R = 189.8
PolicyLoss: -0.00193
Policy_Beta: 0.1
Policy_Entropy: 0.271
Policy_KL: 0.00175
Policy_SD: 0.785
Policy_lr_mult: 1
Steps: 6.02e+03
TotalSteps: 9.96e+06
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.214


ADV1:  -0.004647674619166811 0.02362044532459734 0.24122658093033789 -0

***** Episode 51770, Mean R = 200.3  Std R = 8.4  Min R = 180.3
PolicyLoss: -0.0162
Policy_Beta: 0.1
Policy_Entropy: 0.271
Policy_KL: 0.00121
Policy_SD: 0.775
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 1e+07
VF_0_ExplainedVarNew: 0.941
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.217


ADV1:  0.005425096355142685 0.023830741028261285 0.20556737621848953 -0.17808641962162675
ADV2:  -0.05620864229429729 0.7236345247382728 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   7.7454   5.4895  25.6829  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0097   0.0053   0.0208   1.2858   0.7421   0.3504
***** Episode 51801, Mean R = 200.3  Std R = 6.9  Min R = 181.6
PolicyLoss: 0.0144
Policy_Beta: 0.1
Policy_Entropy: 0.265
Policy_KL: 0.00149
Policy_SD: 0.792
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 1e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.176


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e

optflow_error |  0.0018 |  0.0314 |  0.0000 |  5.9327
pixel_icoords |   -0.26    0.54 |    6.70    6.81 |  -15.60  -14.73 |   16.50   14.66
theta_cv |    0.05 |    0.03 |    0.00 |    0.19
steps    |  193.21 |    6.39 |  180.00 |  218.00
***** Episode 52080, Mean R = 200.6  Std R = 6.0  Min R = 189.6
PolicyLoss: 0.0192
Policy_Beta: 0.1
Policy_Entropy: 0.273
Policy_KL: 0.00298
Policy_SD: 0.782
Policy_lr_mult: 1
Steps: 5.97e+03
TotalSteps: 1.01e+07
VF_0_ExplainedVarNew: 0.739
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.166


Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
ADV1:  -0.005087468106651088 0.031110667494028937 0.2560850873730609 -0.19354741912467543
ADV2:  0.006349769541531566 0.8116573564982205 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.5887   4.1400  17.7240  74.3871  37.8963  27.0569
ValFun  Gradi

theta_cv |    0.05 |    0.03 |    0.00 |    0.18
steps    |  192.55 |    6.70 |  180.00 |  216.00
***** Episode 52390, Mean R = 201.2  Std R = 7.6  Min R = 189.9
PolicyLoss: -0.0062
Policy_Beta: 0.1
Policy_Entropy: 0.267
Policy_KL: 0.00229
Policy_SD: 0.77
Policy_lr_mult: 1
Steps: 6e+03
TotalSteps: 1.01e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.197


ADV1:  -0.0019338457122239612 0.02320826018194444 0.2207427076659525 -0.2657945268069487
ADV2:  0.03055560831924805 0.6796224057493573 3.0 -3.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.4522   2.6086  14.5330  74.3871  37.8963  27.0569
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0045   0.0020   0.0078   1.2858   0.7421   0.3504
***** Episode 52421, Mean R = 200.6  Std R = 9.5  Min R = 179.1
PolicyLoss: -0.0149
Policy_Beta: 0.1
Policy_Entropy: 0.27
Policy_KL: 0.00126
Policy_SD: 0.788
Policy_lr_mult: 1
Steps: 5.99e+03
TotalSteps: 1.01e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.97

KeyboardInterrupt: 

In [7]:
fname = "Optimize-navp=4_Spiral"
policy.save_params(fname)
value_function.save_params(fname)
env.rl_stats.save_history(fname)
policy.save_params(fname)

# Test Policy


In [6]:

env.test_policy_batch(agent,5000,print_every=100,test_mode=True)

worked 1
Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
i (et): 100  (             311)
Cumulative Stats (mean,std,max,argmax)
thrust   |3409.71 |2732.88 |  0.00 |6936.72 |    25
 
Final Stats (mean,std,min,max)
hit_reward |     9.6 |     2.0 |     0.0 |    10.0
hit_100cm |    1.00 |    0.00 |    1.00 |    1.00
hit_50cm |    0.96 |    0.20 |    0.00 |    1.00
norm_vf  |6823.533 | 144.629 |6324.701 |7022.506
norm_rf  |     0.3 |     0.1 |     0.0 |     0.7
position | 30498.2   -37.3 50927.6 |  1050.6  3087.8  2998.8 | 28270.1 -5561.9 45378.6 | 33419.8  5528.0 55829.9
velocity |-2790.352 -39.121  -8.546 | 157.178 817.468 746.337 |-3004.583-1657.697-1427.433 |-2333.1671717.6491484.537
fuel     | 14.68 |  2.15 | 10.02 | 21.85
attitude_321 |  -0.072   0.017  -0.115 |   2.903   0.253   1.790 |  -3.139  -0.522  -3.104 |   3.134   0

Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
i (et): 900  (             311)
Cumulative Stats (mean,std,max,argmax)
thrust   |3415.85 |2744.85 |  0.00 |6936.72 |    25
 
Final Stats (mean,std,min,max)
hit_reward |     9.6 |     1.9 |     0.0 |    10.0
hit_100cm |    1.00 |    0.00 |    1.00 |    1.00
hit_50cm |    0.96 |    0.19 |    0.00 |    1.00
norm_vf  |6827.586 | 155.364 |6132.101 |7022.506
norm_rf  |     0.3 |     0.1 |     0.0 |     0.7
position | 30454.9    -1.5 50608.3 |  1072.1  3022.3  3072.1 | 27911.2 -5625.2 44696.0 | 34592.3  5528.0 56264.3
velocity |-2798.826  17.152  46.556 | 168.051 772.342 761.419 |-3025.028-1729.711-1660.080 |-2069.5371717.6491805.183
fuel     | 14.75 |  2.15 |  9.76 | 23.43
attitude_321 |   0.010   0.000  -0.032 |   2.920   0.261   1.779 |  -3.139  -0.640  -3.131 |   3.141   0.589   3.

Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
i (et): 1700  (             305)
Cumulative Stats (mean,std,max,argmax)
thrust   |3412.73 |2747.71 |  0.00 |6936.72 |    25
 
Final Stats (mean,std,min,max)
hit_reward |     9.6 |     1.9 |     0.0 |    10.0
hit_100cm |    1.00 |    0.00 |    1.00 |    1.00
hit_50cm |    0.96 |    0.19 |    0.00 |    1.00
norm_vf  |6821.089 | 159.207 |6132.101 |7022.506
norm_rf  |     0.3 |     0.1 |     0.0 |     0.9
position | 30502.9   118.0 50471.9 |  1077.3  3118.4  3087.5 | 27911.2 -5625.2 44696.0 | 34592.3  5699.9 56395.3
velocity |-2794.761  34.635  41.991 | 171.746 779.953 768.202 |-3025.028-1773.313-1725.129 |-2069.5371766.2741805.183
fuel     | 14.77 |  2.20 |  8.67 | 32.03
attitude_321 |   0.045   0.004  -0.021 |   2.917   0.264   1.778 |  -3.140  -0.673  -3.139 |   3.141   0.589   3

Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.44545855e-12 2.51893314e-13 2.68893069e-12] 3.063192601926073e-12
i (et): 2500  (             311)
Cumulative Stats (mean,std,max,argmax)
thrust   |3410.90 |2750.14 |  0.00 |6936.72 |  2355
 
Final Stats (mean,std,min,max)
hit_reward |     9.6 |     1.9 |     0.0 |    10.0
hit_100cm |    1.00 |    0.02 |    0.00 |    1.00
hit_50cm |    0.96 |    0.19 |    0.00 |    1.00
norm_vf  |6822.357 | 156.973 |6132.101 |7022.506
norm_rf  |     0.3 |     0.1 |     0.0 |     1.1
position | 30504.8    48.1 50432.9 |  1074.5  3098.7  3118.8 | 27911.2 -5625.2 44417.0 | 34592.3  5816.0 56395.3
velocity |-2796.119  26.254  48.851 | 168.688 771.353 772.372 |-3025.028-1773.313-1780.612 |-2069.5371799.5671805.183
fuel     | 14.77 |  2.22 |  8.67 | 32.03
attitude_321 |   0.017   0.002  -0.020 |   2.919   0.266   1.788 |  -3.141  -0.673  -3.140 |   3.141   0.652   3

i (et): 3300  (             316)
Cumulative Stats (mean,std,max,argmax)
thrust   |3405.64 |2752.59 |  0.00 |6936.72 |  2355
 
Final Stats (mean,std,min,max)
hit_reward |     9.6 |     2.0 |     0.0 |    10.0
hit_100cm |    1.00 |    0.02 |    0.00 |    1.00
hit_50cm |    0.96 |    0.20 |    0.00 |    1.00
norm_vf  |6818.643 | 158.287 |6132.101 |7025.816
norm_rf  |     0.3 |     0.1 |     0.0 |     1.1
position | 30517.8    33.5 50423.0 |  1077.7  3111.7  3122.3 | 27911.2 -5777.7 44417.0 | 34592.3  5816.0 56636.0
velocity |-2791.885  21.122  51.493 | 170.795 781.911 776.466 |-3025.028-1816.281-1780.612 |-2069.5371825.1861815.761
fuel     | 14.76 |  2.18 |  8.67 | 32.03
attitude_321 |   0.022   0.002  -0.032 |   2.916   0.267   1.780 |  -3.141  -0.673  -3.140 |   3.141   0.652   3.140
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.68445941

i (et): 4100  (             316)
Cumulative Stats (mean,std,max,argmax)
thrust   |3405.94 |2754.56 |  0.00 |6936.72 |  2355
 
Final Stats (mean,std,min,max)
hit_reward |     9.6 |     1.9 |     0.0 |    10.0
hit_100cm |    1.00 |    0.02 |    0.00 |    1.00
hit_50cm |    0.96 |    0.19 |    0.00 |    1.00
norm_vf  |6818.635 | 158.085 |6102.223 |7025.816
norm_rf  |     0.3 |     0.1 |     0.0 |     1.1
position | 30530.8    60.0 50406.3 |  1080.4  3093.1  3123.3 | 27911.2 -5961.9 44402.3 | 34592.3  5816.0 56636.0
velocity |-2791.338  24.566  44.636 | 170.938 779.144 781.477 |-3025.028-1816.281-1780.612 |-2069.5371825.1861815.761
fuel     | 14.78 |  2.19 |  8.67 | 32.03
attitude_321 |   0.016   0.004  -0.014 |   2.917   0.270   1.786 |  -3.141  -0.673  -3.140 |   3.141   0.674   3.140
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.68445941

Dynamics: Max Disturbance (m/s^2):  [1.59651751e-12 3.03340678e-13 2.13668871e-12] 2.6844594154973157e-12
Dynamics: Max Disturbance (m/s^2):  [1.46317193e-12 2.51893314e-13 2.68893069e-12] 3.071590889634712e-12
i (et): 4900  (             311)
Cumulative Stats (mean,std,max,argmax)
thrust   |3404.48 |2754.96 |  0.00 |6936.72 |  2355
 
Final Stats (mean,std,min,max)
hit_reward |     9.6 |     1.9 |     0.0 |    10.0
hit_100cm |    1.00 |    0.02 |    0.00 |    1.00
hit_50cm |    0.96 |    0.19 |    0.00 |    1.00
norm_vf  |6818.247 | 157.856 |6102.223 |7025.816
norm_rf  |     0.3 |     0.1 |     0.0 |     1.5
position | 30531.5    18.4 50427.4 |  1081.0  3103.8  3137.4 | 27911.2 -5961.9 44224.5 | 35202.0  5816.0 56636.0
velocity |-2791.788  13.666  45.893 | 170.439 778.832 780.616 |-3025.028-1816.281-1780.612 |-2035.6691825.1861815.761
fuel     | 14.77 |  2.20 |  8.67 | 32.03
attitude_321 |  -0.020   0.004  -0.007 |   2.917   0.270   1.785 |  -3.141  -0.673  -3.140 |   3.141   0.674   3

In [5]:
env.test_policy_batch(agent,5000,print_every=100,test_mode=False)

worked 1
Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.74950343e-13 2.11739863e-12] 2.6615070643765496e-12
Dynamics: Max Disturbance (m/s^2):  [1.45092865e-12 2.44000809e-13 2.69017990e-12] 3.0662351949503933e-12
i (et): 100  (             217)
Cumulative Stats (mean,std,max,argmax)
thrust   |2078.14 |2629.16 |  0.00 |6936.72 |    21
 
Final Stats (mean,std,min,max)
hit_reward |     7.1 |     4.5 |     0.0 |    10.0
hit_100cm |    1.00 |    0.00 |    1.00 |    1.00
hit_50cm |    0.71 |    0.45 |    0.00 |    1.00
norm_vf  |6813.138 | 140.327 |6415.352 |7010.607
norm_rf  |     0.4 |     0.2 |     0.1 |     0.9
position | 30658.2  -431.5 49384.0 |  1101.6  2817.1  3204.7 | 28457.8 -5459.9 44487.5 | 33029.8  5254.4 55633.7
velocity |-2784.341  10.476 -48.853 | 157.753 762.764 831.041 |-2999.708-1591.469-1659.957 |-2321.4371491.1551519.196
fuel     |  8.56 |  3.28 |  2.93 | 19.73
attitude_321 |   0.155   0.022  -0.206 |   2.915   0.290   1.832 |  -3.126  -0.548  -3.121 |   3.136   

Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.74950343e-13 2.11739863e-12] 2.6615070643765496e-12
Dynamics: Max Disturbance (m/s^2):  [1.45092865e-12 2.44000809e-13 2.69017990e-12] 3.0662351949503933e-12
i (et): 900  (             213)
Cumulative Stats (mean,std,max,argmax)
thrust   |2104.62 |2635.70 |  0.00 |6936.72 |    21
 
Final Stats (mean,std,min,max)
hit_reward |     6.9 |     4.6 |     0.0 |    10.0
hit_100cm |    0.99 |    0.10 |    0.00 |    1.00
hit_50cm |    0.69 |    0.46 |    0.00 |    1.00
norm_vf  |6814.320 | 159.175 |6105.202 |7024.207
norm_rf  |     0.4 |     0.2 |     0.1 |     1.7
position | 30567.2  -173.3 49915.6 |  1098.5  3107.6  3165.9 | 28193.4 -5690.6 44223.4 | 34257.7  5626.4 55673.1
velocity |-2783.780  -5.577   3.836 | 175.444 783.606 812.233 |-3022.236-1724.052-1749.429 |-1997.8491760.6701745.312
fuel     |  8.64 |  2.95 |  1.89 | 19.73
attitude_321 |   0.020  -0.003  -0.174 |   2.911   0.281   1.795 |  -3.140  -0.643  -3.128 |   3.140   0.638   3

i (et): 1700  (             208)
Cumulative Stats (mean,std,max,argmax)
thrust   |2094.07 |2630.88 |  0.00 |6936.72 |    21
 
Final Stats (mean,std,min,max)
hit_reward |     7.1 |     4.6 |     0.0 |    10.0
hit_100cm |    0.99 |    0.11 |    0.00 |    1.00
hit_50cm |    0.71 |    0.46 |    0.00 |    1.00
norm_vf  |6819.750 | 157.418 |6062.701 |7024.207
norm_rf  |     0.4 |     0.2 |     0.1 |     2.1
position | 30515.0   -34.4 49910.5 |  1085.2  3094.3  3164.3 | 27957.6 -5786.1 44121.9 | 34257.7  5655.0 55673.1
velocity |-2792.229  -0.784   0.053 | 172.364 773.337 793.203 |-3022.236-1724.052-1777.615 |-1997.8491770.3341745.312
fuel     |  8.57 |  2.94 |  1.89 | 19.73
attitude_321 |   0.015  -0.000  -0.081 |   2.915   0.275   1.787 |  -3.141  -0.643  -3.136 |   3.140   0.702   3.128
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.74950343e-13 2.11739863e-12] 2.66150706

i (et): 2500  (             214)
Cumulative Stats (mean,std,max,argmax)
thrust   |2088.31 |2628.90 |  0.00 |6936.72 |    21
 
Final Stats (mean,std,min,max)
hit_reward |     7.0 |     4.6 |     0.0 |    10.0
hit_100cm |    0.99 |    0.11 |    0.00 |    1.00
hit_50cm |    0.70 |    0.46 |    0.00 |    1.00
norm_vf  |6818.186 | 158.171 |6062.701 |7027.536
norm_rf  |     0.4 |     0.2 |     0.0 |     3.0
position | 30520.9   -43.4 50000.7 |  1074.7  3083.8  3171.4 | 27957.6 -5786.1 44121.9 | 34257.7  5655.0 55727.2
velocity |-2790.474 -17.310   9.130 | 172.147 772.224 800.005 |-3022.236-1783.799-1777.615 |-1997.8491800.4091776.505
fuel     |  8.55 |  2.92 |  1.89 | 19.73
attitude_321 |  -0.034  -0.003  -0.069 |   2.915   0.277   1.794 |  -3.141  -0.643  -3.140 |   3.140   0.702   3.132
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.74950343e-13 2.11739863e-12] 2.66150706

i (et): 3300  (             218)
Cumulative Stats (mean,std,max,argmax)
thrust   |2089.77 |2630.88 |  0.00 |6936.72 |    21
 
Final Stats (mean,std,min,max)
hit_reward |     7.0 |     4.6 |     0.0 |    10.0
hit_100cm |    0.99 |    0.11 |    0.00 |    1.00
hit_50cm |    0.70 |    0.46 |    0.00 |    1.00
norm_vf  |6820.045 | 158.556 |6062.701 |7027.536
norm_rf  |     0.4 |     0.2 |     0.0 |     3.0
position | 30510.7   -19.8 49986.5 |  1085.1  3083.1  3151.2 | 27908.3 -5786.1 44121.9 | 34257.7  5856.2 55727.2
velocity |-2792.349  -6.836   6.130 | 172.547 770.481 794.928 |-3022.236-1783.799-1777.615 |-1975.5771800.4091875.267
fuel     |  8.56 |  2.90 |  1.79 | 19.73
attitude_321 |   0.005  -0.002  -0.040 |   2.917   0.275   1.788 |  -3.141  -0.679  -3.140 |   3.141   0.702   3.135
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.75259041e-13 2.11903340e-12] 2.66283970

i (et): 4100  (             214)
Cumulative Stats (mean,std,max,argmax)
thrust   |2091.16 |2631.13 |  0.00 |6936.72 |    21
 
Final Stats (mean,std,min,max)
hit_reward |     7.0 |     4.6 |     0.0 |    10.0
hit_100cm |    0.99 |    0.11 |    0.00 |    1.00
hit_50cm |    0.70 |    0.46 |    0.00 |    1.00
norm_vf  |6819.029 | 159.172 |6062.701 |7030.857
norm_rf  |     0.4 |     0.2 |     0.0 |     4.1
position | 30518.7   -27.6 49974.2 |  1090.0  3096.2  3142.3 | 27908.3 -5786.1 44121.9 | 34620.3  5856.2 55727.2
velocity |-2790.648  -3.150  -2.311 | 173.096 775.948 795.347 |-3022.236-1783.799-1777.756 |-1975.5771800.4091875.267
fuel     |  8.56 |  2.88 |  1.44 | 19.73
attitude_321 |   0.027   0.000  -0.042 |   2.916   0.274   1.785 |  -3.141  -0.679  -3.140 |   3.142   0.702   3.142
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.75259041e-13 2.11903340e-12] 2.66283970

i (et): 4900  (             208)
Cumulative Stats (mean,std,max,argmax)
thrust   |2096.37 |2632.75 |  0.00 |6936.72 |  4157
 
Final Stats (mean,std,min,max)
hit_reward |     7.0 |     4.6 |     0.0 |    10.0
hit_100cm |    0.99 |    0.12 |    0.00 |    1.00
hit_50cm |    0.70 |    0.46 |    0.00 |    1.00
norm_vf  |6819.593 | 159.306 |6062.701 |7031.454
norm_rf  |     0.4 |     0.2 |     0.0 |     4.1
position | 30515.6   -19.2 49991.0 |  1094.5  3085.5  3143.0 | 27908.3 -5786.1 44121.9 | 34620.3  5856.2 55928.7
velocity |-2791.030 -10.063   3.709 | 173.163 774.755 795.429 |-3025.714-1783.799-1777.756 |-1975.5771800.4091878.619
fuel     |  8.59 |  2.90 |  1.44 | 19.73
attitude_321 |   0.026  -0.002  -0.037 |   2.915   0.274   1.789 |  -3.142  -0.679  -3.140 |   3.142   0.702   3.142
w        |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000 |   0.000   0.000   0.000
Dynamics: Max Disturbance (m/s^2):  [1.58891321e-12 2.75259041e-13 2.11903340e-12] 2.66283970

In [7]:
theta = envu.deg2rad(0.01)
1000*np.sin(theta)

0.1745329243133368