# Test Recurrent Policy with Extreme Parameter Variation 

In [1]:
import numpy as np
import os,sys



sys.path.append('../../../RL_lib/Agents')
sys.path.append('../../../RL_lib/Policies/AWR')
sys.path.append('../../../RL_lib/Policies/Common')
sys.path.append('../../../RL_lib/Utils')
sys.path.append('../../../Env')
sys.path.append('../../../Imaging')


%load_ext autoreload
%load_ext autoreload
%autoreload 2
%matplotlib nbagg
import os
print(os.getcwd())

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/Users/briangaudet/Study/Subjects/MachineLearning/Projects/Asteroid_CPO_seeker-master/Experiments/Extended/Optimize_HF


In [2]:
%%html
<style>
.output_wrapper, .output {
    height:auto !important;
    max-height:1000px;  /* your desired max-height here */
}
.output_scroll {
    box-shadow:none !important;
    webkit-box-shadow:none !important;
}
</style>

# Optimize Policy

In [3]:
from env import Env
import env_utils as envu
from dynamics_model import Dynamics_model
from lander_model import Lander_model
from ic_gen import Landing_icgen
import rl_utils
import attitude_utils as attu
import optics_utils as optu
from arch_policy_vf_vfu import Arch

from policy_awr import Policy
from softmax_pd import Softmax_pd as PD
from value_function import Value_function

import policy_nets as policy_nets
import valfunc_nets as valfunc_nets


from agent import Agent


import torch.nn as nn

from flat_constraint import Flat_constraint
from glideslope_constraint import Glideslope_constraint
from rh_constraint import RH_constraint
from no_attitude_constraint import Attitude_constraint
from w_constraint import W_constraint
from reward_attitude import Reward
from asteroid_hfr import Asteroid

from thruster_model_cubesat import Thruster_model

from sensor import Sensor
from seeker import Seeker

landing_site_range = 0.0
landing_site = None #np.asarray([-250.,0.,0.])

asteroid_model = Asteroid(landing_site_override=landing_site, omega_range=(1e-5,5e-4))

ap = attu.Quaternion_attitude()

C_cb = optu.rotate_optical_axis(0.0, 0.0, np.pi)
r_cb = np.asarray([0,0,0])
fov=envu.deg2rad(90)
seeker = Seeker(attitude_parameterization=ap, C_cb=C_cb, r_cb=r_cb,
                radome_slope_bounds=(-0.05,0.05), range_bias=(-0.05,0.05),
                   fov=fov, debug=False)
sensor = Sensor(seeker, attitude_parameterization=ap,  use_range=True, apf_tau1=300, use_dp=False,
                      landing_site_range=landing_site_range,
                      pool_type='max', state_type=Sensor.optflow_state_range_dp1)
print(sensor.track_func)
sensor.track_func = sensor.track_func1
print(sensor.track_func)
logger = rl_utils.Logger()
dynamics_model = Dynamics_model(h=2)
thruster_model = Thruster_model(pulsed=True, scale=1.0, offset=0.4)
lander_model = Lander_model(asteroid_model, thruster_model, attitude_parameterization=ap, sensor=sensor, 
                             landing_site_range=landing_site_range, com_range=(-0.10,0.10),
                              attitude_bias=0.05, omega_bias=0.05)

lander_model.get_state_agent = lander_model.get_state_agent_sensor_att_w2

obs_dim = 13
action_dim = 12
actions_per_dim = 2
logit_dim = action_dim * actions_per_dim

recurrent_steps = 60

reward_object = Reward(landing_rlimit=2, landing_vlimit=0.1, 
                       tracking_bias=0.01, fov_coeff=-50., 
                       att_coeff=-0.20,
                       tracking_coeff=-0.5, magv_coeff=-1.0,
                       fuel_coeff=-0.10,  landing_coeff=10.0)

glideslope_constraint = Glideslope_constraint(gs_limit=-1.0)
shape_constraint = Flat_constraint()
attitude_constraint = Attitude_constraint(ap)
w_constraint = W_constraint(w_limit=(0.1,0.1,0.1), w_margin=(0.05,0.05,0.05))
rh_constraint = RH_constraint(rh_limit=150)

wi=0.05
ic_gen = Landing_icgen((800,1000), 
                           p_engine_fail=0.5,
                           engine_fail_scale=(0.5,1.0),
                           lander_wll=(-wi,-wi,-wi),
                           lander_wul=(wi,wi,wi),
                           attitude_parameterization=ap,
                           position_error=(0,np.pi/4),
                           heading_error=(0,np.pi/8),
                           attitude_error=(0,np.pi/16),
                           min_mass=450, max_mass=500,
                           mag_v=(0.05,0.1),
                           debug=False,
                           inertia_uncertainty_diag=10.0,
                           inertia_uncertainty_offdiag=1.0)

env = Env(ic_gen, lander_model, dynamics_model, logger,
          landing_site_range=landing_site_range,
          debug_done=False,
          reward_object=reward_object,
          glideslope_constraint=glideslope_constraint,
          attitude_constraint=attitude_constraint,
          w_constraint=w_constraint,
          rh_constraint=rh_constraint,
          tf_limit=5000.0,print_every=10,nav_period=6)




env.ic_gen.show()

arch = Arch()

policy = Policy(policy_nets.GRU1(obs_dim, logit_dim, recurrent_steps=recurrent_steps,output_network_scale=5), 
               PD(action_dim, actions_per_dim),
               shuffle=False,
               max_grad_norm=30,
               rollout_limit=3,
               kl_limit=0.5,
               init_func=rl_utils.xn_init)
#policy = Policy(policy_nets.GRU1(obs_dim, logit_dim, recurrent_steps=recurrent_steps), 
#                PD(action_dim, actions_per_dim),
#                shuffle=False,
#                kl_targ=0.001,epochs=20, beta=0.1, servo_kl=True, max_grad_norm=30, scale_vector_obs=True,
 #               init_func=rl_utils.xn_init)
value_function = Value_function(valfunc_nets.GRU1(obs_dim, recurrent_steps=recurrent_steps), scale_obs=True,
                                shuffle=False, batch_size=9999999, max_grad_norm=30, 
                                verbose=False)

agent = Agent(arch, policy, value_function, None, env, logger,
              policy_episodes=30, policy_steps=3000, gamma1=0.95, gamma2=0.995, 
              recurrent_steps=recurrent_steps, monitor=env.rl_stats)
agent.train(120000)

Quaternion_attitude
Euler321 Attitude
C_cb: 
[[ 1.0000000e+00  0.0000000e+00 -0.0000000e+00]
 [ 0.0000000e+00 -1.0000000e+00  1.2246468e-16]
 [ 0.0000000e+00 -1.2246468e-16 -1.0000000e+00]]
[ 0.0000000e+00 -1.2246468e-16 -1.0000000e+00]
using max  pooling
V4: Output State type:  <function Sensor.optflow_state_range_dp1 at 0x137bdb378>
<bound method Sensor.track_func1 of <sensor.Sensor object at 0x1286db550>>
<bound method Sensor.track_func1 of <sensor.Sensor object at 0x1286db550>>
6dof dynamics model 
thruster model: 
Inertia Tensor:  [[333.33333333   0.           0.        ]
 [  0.         333.33333333   0.        ]
 [  0.           0.         333.33333333]]
Lander Model: 
Reward_terminal equator
queue fixed
Flat Constraint
Attitude Constraint
Rotational Velocity Constraint
Position Hysterises Constraint


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

lander env RHL
Landing_icgen:
[[-1.]
 [ 1.]]
AWR Policy: 
	xn_init: layer  Linear(in_features=13, out_features=130, bias=True)
	xn_init: layer  GRUCell(130, 124)
	xn_init: layer  Linear(in_features=124, out_features=120, bias=True)
	xn_init: layer  Linear(in_features=120, out_features=24, bias=True)
Policy: recurrent steps > 1, disabling shuffle
	Test Mode:          False
	Shuffle :           False
	Shuffle by Chunks:  False
	Max Grad Norm:      30
	Recurrent Steps:    60
	Rollout Limit:      3
	Advantage Func:     <advantage_utils.Adv_relu object at 0x139c76fd0>
	Advantage Norm:     <function Adv_normalizer.apply at 0x137070378>
	PD:                 <softmax_pd.Softmax_pd object at 0x139c76f28>
Value Funtion
	xn_init: layer  Linear(in_features=13, out_features=130, bias=True)
	xn_init: layer  GRUCell(130, 25)
	xn_init: layer  Linear(in_features=25, out_features=5, bias=True)
	xn_init: layer  Linear(in_features=5, out_features=1, bias=True)
Value Function: recurrent steps > 1, disablin

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2872   0.1419   0.5379   9.0125   2.2441   3.0464
ADVA:  (6117,) (9250,) 0.6612972972972972
ADV1:  0.0 0.07339359866792662 0.907939375824416 1.8542159388731103 -3.670206477460087
ADVB:  (6001,) (9250,) 0.6487567567567567
ADV2:  0.3606397042038091 0.4245461084109837 0.4154904976501037 1.7449606560490876 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0355   0.0092   0.0495   0.1346   0.1104   0.0175
***** Episode 243, Mean R = -212.7  Std R = 71.4  Min R = -455.9
PolicyLoss: 7.02
Policy_Entropy: 0.000351
Policy_KL: 0.00486
Policy_SD: 0.955
Steps: 3.15e+03
TotalSteps: 2.14e+04
VF_0_ExplainedVarNew: 0.134
VF_0_ExplainedVarOld: 0.111
VF_0_Loss : 0.737


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1582   0.0614   0.3193   9.0125   2.2441   3.0464
ADVA:  (6057,) (9325,) 0.6495442359249329
ADV1:  0.0 0.050574680237576 0.9115491515439658 1.8542159388731103 -3.670206477460087
ADVB:  (5910,) (9325,) 0.6337801608579089
ADV2:  

***** Episode 463, Mean R = -370.5  Std R = 187.2  Min R = -1002.0
PolicyLoss: 5.37
Policy_Entropy: 0.000397
Policy_KL: 0.00311
Policy_SD: 0.955
Steps: 7.76e+03
TotalSteps: 5.77e+04
VF_0_ExplainedVarNew: 0.167
VF_0_ExplainedVarOld: 0.138
VF_0_Loss : 0.351


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0824   0.0302   0.1562   9.0125   2.2441   3.0464
ADVA:  (16042,) (21437,) 0.7483323226197696
ADV1:  0.20635248917171267 0.07616789296875445 0.6234439753533787 1.4604584427396836 -4.340532236986319
ADVB:  (15497,) (21437,) 0.7229089891309418
ADV2:  0.29685841419527326 0.3421028620227086 0.3265152041824085 2.170444147505475 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0160   0.0046   0.0234   0.1346   0.1104   0.0175
***** Episode 494, Mean R = -360.1  Std R = 197.8  Min R = -879.9
PolicyLoss: 5.04
Policy_Entropy: 0.000405
Policy_KL: 0.00342
Policy_SD: 0.955
Steps: 7.3e+03
TotalSteps: 6.5e+04
VF_0_ExplainedVarNew: 0.178
VF_0_ExplainedVarOld: 0.165
VF_0_Loss : 0.39


Val

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0226   0.0086   0.0389   0.1346   0.1104   0.0175
***** Episode 711, Mean R = -469.2  Std R = 146.2  Min R = -765.3
PolicyLoss: 5.57
Policy_Entropy: 0.000449
Policy_KL: 0.00428
Policy_SD: 0.955
Steps: 1.11e+04
TotalSteps: 1.37e+05
VF_0_ExplainedVarNew: 0.407
VF_0_ExplainedVarOld: 0.394
VF_0_Loss : 0.0956


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0372   0.0148   0.0710   9.0125   2.2441   3.0464
ADVA:  (18912,) (32492,) 0.5820509663917272
ADV1:  0.01979821475880117 -0.000692938174091515 0.26989402837471443 1.9853315295528713 -4.177054654117923
ADVB:  (19438,) (32492,) 0.5982395666625631
ADV2:  0.14748313020888734 0.31215832843618735 0.39897223624795936 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0242   0.0087   0.0446   0.1346   0.1104   0.0175
***** Episode 742, Mean R = -431.6  Std R = 181.5  Min R = -995.9
PolicyLoss: 5.52
Policy_Entropy: 0.000459
Policy_KL: 0.00404
Policy_SD: 0.955
Steps: 1.01e+04
Tot

w        |    0.00   -0.00    0.00 |    0.03    0.03    0.03 |   -0.10   -0.10   -0.10 |    0.10    0.10    0.10
a_f      |   -0.03   -0.06 |    0.69    1.82 |   -1.49   -3.13 |    1.51    3.10
w_f      |   -0.00    0.00    0.00 |    0.03    0.03    0.03 |   -0.10   -0.07   -0.10 |    0.10    0.10    0.10
w_rewards |   -5.38 |    4.57 |  -27.54 |   -0.05
w_penalty |   -3.87 |   19.29 | -100.00 |    0.00
fov_penalty |  -15.97 |   23.31 |  -50.00 |    0.00
theta_cv |    1.52 |    0.69 |    0.01 |    3.14
seeker_angles |    0.03    0.01 |    0.32    0.33 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0289  0.0056 |  0.3175  0.3320 | -1.0000 -1.0000 |  1.0000  0.9999
optical_flow |  0.0001  0.0000 |  0.0022  0.0022 | -0.0694 -0.0344 |  0.0197  0.0346
v_err    | -0.4538 |  0.1786 | -1.1671 |  0.6240
landing_rewards |    0.00 |    0.00 |    0.00 |    0.00
landing_margin |  877.10 |  254.73 |   79.90 | 1272.20
tracking_rewards | -392.38 |  160.63 | -954.25 |  -28.69
steps    |     368 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0459   0.0189   0.0978   0.1346   0.1104   0.0236
Update Cnt = 40    ET =   1334.3   Stats:  Mean, Std, Min, Max
r_f      | -105.61  -22.12  -14.44 |  560.98  522.93  630.36 |-1350.42-1448.01-1344.32 | 1400.78 1477.42 1394.00
v_f      |   -0.01   -0.04    0.02 |    0.38    0.38    0.38 |   -0.94   -1.03   -0.90 |    1.07    0.87    1.20
r_i      | -107.61   22.32  -12.52 |  689.86  637.37  778.79 |-1366.29-1248.27-1310.09 | 1285.58 1318.91 1293.22
v_i      |    0.01   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.09 |    0.09    0.09    0.09
norm_rf  |  728.65 |  288.41 |   28.40 | 1224.88
norm_vf  |    0.62 |    0.21 |    0.15 |    1.22
gs_f     |    0.94 |    1.11 |    0.01 |   10.24
thrust   |    0.01   -0.01    0.00 |    0.98    0.99    0.99 |   -3.46   -3.40   -3.44 |    3.41    3.33    3.38
norm_thrust |    1.58 |    0.65 |    0.00 |    3.46
fuel     |    6.23 |    2.06 |    2.30 |   13.85
rewards  | -430.62 | 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0715   0.0218   0.1259   0.1346   0.1104   0.0236
***** Episode 1548, Mean R = -195.4  Std R = 24.4  Min R = -249.1
PolicyLoss: 5.42
Policy_Entropy: 0.0009
Policy_KL: 0.0115
Policy_SD: 0.957
Steps: 7.45e+03
TotalSteps: 4.2e+05
VF_0_ExplainedVarNew: 0.763
VF_0_ExplainedVarOld: 0.728
VF_0_Loss : 0.0946


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0571   0.0234   0.1207   9.0125   2.2441   3.0464
ADVA:  (13110,) (22561,) 0.5810912636851203
ADV1:  0.0038940239609819016 0.01257426617465924 0.1981173379898629 2.4133423233032225 -1.3012385289094701
ADVB:  (11253,) (22561,) 0.49878108239882984
ADV2:  0.0 0.272504945533091 0.5160362620021516 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0941   0.0335   0.1645   0.1645   0.1104   0.0335
***** Episode 1579, Mean R = -198.6  Std R = 24.7  Min R = -244.1
PolicyLoss: 5.51
Policy_Entropy: 0.000925
Policy_KL: 0.0121
Policy_SD: 0.958
Steps: 7.63e+03
TotalSteps: 4.28e+05
VF_0_

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0606   0.0177   0.0996   0.1645   0.1104   0.0335
***** Episode 1796, Mean R = -172.5  Std R = 28.0  Min R = -251.9
PolicyLoss: 5.69
Policy_Entropy: 0.00129
Policy_KL: 0.0135
Policy_SD: 0.958
Steps: 7.4e+03
TotalSteps: 4.79e+05
VF_0_ExplainedVarNew: 0.909
VF_0_ExplainedVarOld: 0.883
VF_0_Loss : 0.0767


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0355   0.0112   0.0619   9.0125   2.2441   3.0464
ADVA:  (12211,) (21478,) 0.5685352453673527
ADV1:  0.0011631749923757817 0.01265879670296177 0.1645386729812487 1.9287289479402296 -1.062441606726693
ADVB:  (10943,) (21478,) 0.509498091069932
ADV2:  0.01301818239411048 0.2791389445455969 0.5306679028319793 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0714   0.0258   0.1288   0.1645   0.1104   0.0335
***** Episode 1827, Mean R = -168.2  Std R = 21.4  Min R = -213.8
PolicyLoss: 5.38
Policy_Entropy: 0.00134
Policy_KL: 0.012
Policy_SD: 0.958
Steps: 7.16e+03
TotalSteps: 4

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0978   0.0375   0.1940   0.1940   0.1104   0.0375
***** Episode 2044, Mean R = -151.6  Std R = 24.1  Min R = -198.3
PolicyLoss: 4.55
Policy_Entropy: 0.00179
Policy_KL: 0.0109
Policy_SD: 0.958
Steps: 7.51e+03
TotalSteps: 5.38e+05
VF_0_ExplainedVarNew: 0.856
VF_0_ExplainedVarOld: 0.84
VF_0_Loss : 0.0747


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0621   0.0249   0.1131   9.0125   2.2441   3.0464
ADVA:  (12284,) (22484,) 0.5463440668920121
ADV1:  0.0 -0.006442269021297327 0.15563859665513366 2.692490450959979 -1.3091996637980166
ADVB:  (10978,) (22484,) 0.4882583170254403
ADV2:  0.0 0.223911838690093 0.4840801102151578 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0920   0.0333   0.1763   0.1940   0.1104   0.0375
***** Episode 2075, Mean R = -155.6  Std R = 22.8  Min R = -215.3
PolicyLoss: 4.38
Policy_Entropy: 0.00183
Policy_KL: 0.0102
Policy_SD: 0.959
Steps: 7.67e+03
TotalSteps: 5.45e+05
VF_0_ExplainedVarNew: 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0586   0.0151   0.0957   0.2312   0.1215   0.0416
***** Episode 2292, Mean R = -150.4  Std R = 15.7  Min R = -193.5
PolicyLoss: 4.91
Policy_Entropy: 0.00234
Policy_KL: 0.0115
Policy_SD: 0.961
Steps: 7.98e+03
TotalSteps: 6e+05
VF_0_ExplainedVarNew: 0.94
VF_0_ExplainedVarOld: 0.922
VF_0_Loss : 0.0606


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0812   0.0359   0.1608   9.0125   2.2441   3.0464
ADVA:  (13077,) (24568,) 0.5322777596873982
ADV1:  0.0 0.00563783174178048 0.1442380508331108 1.8494375038146973 -1.2254886945088725
ADVB:  (13515,) (24568,) 0.5501058287202866
ADV2:  0.07043932571805887 0.3107919750084503 0.5789401567472151 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1245   0.0458   0.2339   0.2339   0.1245   0.0458
***** Episode 2323, Mean R = -148.2  Std R = 22.0  Min R = -195.9
PolicyLoss: 5.31
Policy_Entropy: 0.0023
Policy_KL: 0.0135
Policy_SD: 0.96
Steps: 8.1e+03
TotalSteps: 6.09e+05
VF_0_Explaine

***** Episode 2540, Mean R = -154.0  Std R = 22.6  Min R = -209.9
PolicyLoss: 3.91
Policy_Entropy: 0.00276
Policy_KL: 0.0144
Policy_SD: 0.961
Steps: 8.73e+03
TotalSteps: 6.68e+05
VF_0_ExplainedVarNew: 0.862
VF_0_ExplainedVarOld: 0.848
VF_0_Loss : 0.0589


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0521   0.0199   0.1074   9.0125   2.2441   3.0464
ADVA:  (14017,) (25588,) 0.5477958418008442
ADV1:  0.008798056890530842 0.012009455776483545 0.17953327003277184 2.432137457529704 -1.2570345433553083
ADVB:  (15235,) (25588,) 0.5953962795060185
ADV2:  0.07728398288257923 0.24961957503426718 0.5219092208763819 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0728   0.0206   0.1285   0.2711   0.1405   0.0565
***** Episode 2571, Mean R = -144.4  Std R = 23.9  Min R = -200.8
PolicyLoss: 3.82
Policy_Entropy: 0.00288
Policy_KL: 0.0114
Policy_SD: 0.96
Steps: 8.46e+03
TotalSteps: 6.76e+05
VF_0_ExplainedVarNew: 0.785
VF_0_ExplainedVarOld: 0.73
VF_0_Loss : 0.0675


ValFun  Gradien

w        |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.05   -0.06 |    0.08    0.06    0.06
a_f      |   -0.02   -0.13 |    0.68    1.80 |   -1.49   -3.13 |    1.48    3.11
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.02 |   -0.03   -0.04   -0.06 |    0.08    0.05    0.06
w_rewards |   -0.01 |    0.09 |   -1.42 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |  -47.42 |   11.06 |  -50.00 |    0.00
theta_cv |    0.63 |    0.33 |    0.00 |    3.13
seeker_angles |   -0.08   -0.05 |    0.32    0.35 |   -1.00   -1.00 |    1.00    1.00
cs_angles | -0.0815 -0.0539 |  0.3207  0.3519 | -1.0000 -1.0000 |  1.0000  1.0000
optical_flow | -0.0011 -0.0007 |  0.0085  0.0103 | -0.5022 -0.5243 |  0.2362  0.4105
v_err    | -0.0275 |  0.1021 | -0.6925 |  0.9653
landing_rewards |    0.00 |    0.00 |    0.00 |    0.00
landing_margin |  211.61 |  135.66 |    4.88 |  726.01
tracking_rewards |  -92.49 |   23.60 | -201.63 |  -47.57
steps    |     281 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1167   0.0524   0.2339   0.2901   0.1458   0.0633
Update Cnt = 100    ET =   1065.6   Stats:  Mean, Std, Min, Max
r_f      |   31.90  -14.06    9.09 |  270.23  262.81  297.10 | -711.67 -827.93 -684.78 |  873.04  586.68  820.05
v_f      |    0.06    0.01   -0.01 |    0.41    0.38    0.41 |   -1.27   -0.88   -1.05 |    1.32    1.05    0.98
r_i      |  -30.02   49.65    2.33 |  696.81  682.16  729.40 |-1289.09-1278.24-1198.23 | 1282.30 1260.63 1244.96
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.04 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |  190.75 |  133.61 |    9.03 |  631.14
norm_vf  |    0.65 |    0.24 |    0.25 |    1.58
gs_f     |    0.99 |    1.27 |    0.02 |   14.72
thrust   |    0.01    0.01   -0.00 |    0.97    0.99    0.99 |   -3.40   -3.44   -3.44 |    3.45    3.37    3.42
norm_thrust |    1.57 |    0.67 |    0.00 |    3.46
fuel     |    4.63 |    0.87 |    2.45 |    6.20
rewards  | -147.27 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1367   0.0446   0.2596   0.2901   0.1458   0.0633
***** Episode 3408, Mean R = -150.3  Std R = 28.5  Min R = -245.9
PolicyLoss: 4.21
Policy_Entropy: 0.00564
Policy_KL: 0.0124
Policy_SD: 0.958
Steps: 9.08e+03
TotalSteps: 9.17e+05
VF_0_ExplainedVarNew: 0.852
VF_0_ExplainedVarOld: 0.832
VF_0_Loss : 0.0928


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0246   0.0071   0.0428   9.0125   2.2441   3.0464
ADVA:  (15019,) (27804,) 0.5401740756725651
ADV1:  0.0 0.0070256358346811555 0.16505253376895077 2.1300205262502034 -1.3760358715057395
ADVB:  (12969,) (27804,) 0.4664436771687527
ADV2:  0.0 0.22876911421568547 0.5490527125599868 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1016   0.0362   0.1954   0.2901   0.1458   0.0633
***** Episode 3439, Mean R = -145.6  Std R = 21.7  Min R = -201.4
PolicyLoss: 4.15
Policy_Entropy: 0.00561
Policy_KL: 0.0109
Policy_SD: 0.958
Steps: 9.64e+03
TotalSteps: 9.27e+05
VF_0_ExplainedVarN

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1101   0.0390   0.1953   0.3961   0.1976   0.0758
***** Episode 3656, Mean R = -152.2  Std R = 26.1  Min R = -230.9
PolicyLoss: 3.5
Policy_Entropy: 0.00695
Policy_KL: 0.0111
Policy_SD: 0.959
Steps: 9.70e+03
TotalSteps: 9.92e+05
VF_0_ExplainedVarNew: 0.929
VF_0_ExplainedVarOld: 0.911
VF_0_Loss : 0.0526


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0209   0.0077   0.0393   9.0125   2.2441   3.0464
ADVA:  (17916,) (28794,) 0.6222129610335486
ADV1:  0.0048982743017153 0.0028710539905437912 0.1465923751636055 2.3044414413336467 -1.7007102688153606
ADVB:  (14256,) (28794,) 0.49510314648885184
ADV2:  0.0 0.20409160025719425 0.4535102147723788 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1358   0.0511   0.2696   0.3961   0.1976   0.0758
***** Episode 3687, Mean R = -143.5  Std R = 17.1  Min R = -174.6
PolicyLoss: 3.43
Policy_

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1816   0.0647   0.3135   0.3961   0.1976   0.0758
***** Episode 3904, Mean R = -143.3  Std R = 20.8  Min R = -177.8
PolicyLoss: 3.76
Policy_Entropy: 0.00719
Policy_KL: 0.0144
Policy_SD: 0.959
Steps: 9.48e+03
TotalSteps: 1.07e+06
VF_0_ExplainedVarNew: 0.88
VF_0_ExplainedVarOld: 0.861
VF_0_Loss : 0.0684


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0373   0.0160   0.0820   9.0125   2.2441   3.0464
ADVA:  (15202,) (29345,) 0.5180439597887204
ADV1:  0.0 0.005297765580863377 0.15869278504693873 2.4305920892385124 -1.1275546566645325
ADVB:  (13658,) (29345,) 0.4654285227466349
ADV2:  0.0 0.21543993802225225 0.506615457823106 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1306   0.0403   0.2370   0.3961   0.1976   0.0758
***** Episode 3935, Mean R = -141.0  Std R = 22.0  Min R = -193.7
PolicyLoss: 3.76
Policy_Entropy: 0.00731
Policy_KL: 0.0167
Policy_SD: 0.959
Steps: 1.01e+04
TotalSteps: 1.08e+06
VF_0_ExplainedVarNew:

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1604   0.0568   0.3024   0.4871   0.2514   0.0982
***** Episode 4152, Mean R = -140.2  Std R = 18.4  Min R = -183.5
PolicyLoss: 3.39
Policy_Entropy: 0.00904
Policy_KL: 0.0213
Policy_SD: 0.958
Steps: 9.66e+03
TotalSteps: 1.15e+06
VF_0_ExplainedVarNew: 0.896
VF_0_ExplainedVarOld: 0.877
VF_0_Loss : 0.0651


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0253   0.0098   0.0404   9.0125   2.2441   3.0464
ADVA:  (17039,) (29105,) 0.5854320563477066
ADV1:  0.0 -0.0029315534486440867 0.16141052929087715 1.9101323763529459 -1.6240312178929668
ADVB:  (13435,) (29105,) 0.4616045353032125
ADV2:  0.0 0.20376982747274022 0.4838505683484484 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1823   0.0697   0.3747   0.4871   0.2514   0.0982
***** Episode 4183, Mean R = -142.5  Std R = 18.7  Min R = -180.6
PolicyLoss: 3.52
Policy_Entropy: 0.00908
Policy_KL: 0.0226
Policy_SD: 0.957
Steps: 9.49e+03
TotalSteps: 1.16e+06
VF_0_ExplainedVar

***** Episode 4400, Mean R = -139.5  Std R = 20.9  Min R = -226.2
PolicyLoss: 3.04
Policy_Entropy: 0.0097
Policy_KL: 0.0226
Policy_SD: 0.953
Steps: 9.78e+03
TotalSteps: 1.22e+06
VF_0_ExplainedVarNew: 0.87
VF_0_ExplainedVarOld: 0.839
VF_0_Loss : 0.0732


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0389   0.0167   0.0800   9.0125   2.2441   3.0464
ADVA:  (14322,) (29941,) 0.4783407367823386
ADV1:  0.0 -0.0028386859998449066 0.19035733542147978 2.492138204574585 -1.6504945182800315
ADVB:  (15432,) (29941,) 0.515413646838783
ADV2:  0.01111762846281065 0.1973288472718947 0.4738345323055411 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1461   0.0486   0.2615   0.4871   0.2665   0.0982
***** Episode 4431, Mean R = -139.3  Std R = 22.0  Min R = -173.6
PolicyLoss: 2.98
Policy_Entropy: 0.0101
Policy_KL: 0.0187
Policy_SD: 0.954
Steps: 1.01e+04
TotalSteps: 1.23e+06
VF_0_ExplainedVarNew: 0.763
VF_0_ExplainedVarOld: 0.69
VF_0_Loss : 0.0902


ValFun  Gradients: u/sd/Max/C Max/

attitude |    0.08    0.06   -0.04 |    1.24    0.68    1.86 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.05   -0.07   -0.05 |    0.05    0.08    0.05
a_f      |    0.05   -0.12 |    0.72    1.94 |   -1.53   -3.13 |    1.45    3.13
w_f      |   -0.00    0.00   -0.00 |    0.01    0.02    0.01 |   -0.04   -0.07   -0.04 |    0.04    0.08    0.03
w_rewards |   -0.01 |    0.06 |   -0.72 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |  -43.06 |   17.28 |  -50.00 |    0.00
theta_cv |    0.62 |    0.34 |    0.00 |    2.85
seeker_angles |   -0.01   -0.04 |    0.30    0.30 |   -1.00   -1.00 |    1.00    1.00
cs_angles | -0.0123 -0.0387 |  0.2969  0.3008 | -0.9995 -0.9993 |  0.9992  0.9999
optical_flow | -0.0004 -0.0000 |  0.0141  0.0135 | -0.4028 -0.6243 |  0.5592  0.3804
v_err    | -0.0233 |  0.0842 | -0.8345 |  0.5953
landing_rewards |    0.03 |    0.57 |    0.00 |   10.00
landing_margin |   96

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2454   0.0926   0.5159   9.0186   3.8139   2.4432
Update Cnt = 160    ET =   1279.2   Stats:  Mean, Std, Min, Max
r_f      |   -3.73  -16.73    0.99 |  221.05  196.62  238.06 | -703.43 -679.90 -514.63 |  674.87  633.43  717.74
v_f      |    0.04    0.02   -0.01 |    0.29    0.29    0.28 |   -0.89   -0.80   -0.97 |    1.11    0.95    0.92
r_i      |  -49.50  -49.79   20.22 |  683.35  652.06  768.12 |-1299.42-1311.86-1313.83 | 1313.22 1315.66 1312.58
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.10    0.09
norm_rf  |   83.05 |   84.88 |    1.36 |  465.12
norm_vf  |    0.45 |    0.22 |    0.06 |    1.21
gs_f     |    0.85 |    0.89 |    0.03 |    7.12
thrust   |    0.01    0.01   -0.00 |    0.96    0.95    0.96 |   -3.29   -3.12   -3.46 |    3.42    3.42    3.27
norm_thrust |    1.51 |    0.69 |    0.00 |    3.46
fuel     |    4.75 |    0.58 |    2.31 |    6.00
rewards  | -136.49 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2055   0.0713   0.4291   9.0186   3.8139   2.4432
***** Episode 5268, Mean R = -138.8  Std R = 33.6  Min R = -234.2
PolicyLoss: 2.8
Policy_Entropy: 0.0158
Policy_KL: 0.011
Policy_SD: 0.95
Steps: 1.1e+04
TotalSteps: 1.51e+06
VF_0_ExplainedVarNew: 0.783
VF_0_ExplainedVarOld: 0.756
VF_0_Loss : 0.0663


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0339   0.0091   0.0577   9.0125   2.2441   3.0464
ADVA:  (18405,) (32195,) 0.5716726199720453
ADV1:  0.0 -0.003734783161893535 0.18692190659961885 1.9675049463907877 -1.9800424694658765
ADVB:  (15505,) (32195,) 0.4815965211989439
ADV2:  0.0 0.1715579077972405 0.4840859830178794 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1667   0.0559   0.3221   9.0186   3.8139   2.4432
***** Episode 5299, Mean R = -137.2  Std R = 23.2  Min R = -186.3
PolicyLoss: 2.58
Policy_Entropy: 0.0155
Policy_KL: 0.0126
Policy_SD: 0.951
Steps: 1.08e+04
TotalSteps: 1.52e+06
VF_0_ExplainedVarNew: 0.8

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2673   0.0985   0.4766   9.0186   3.8139   2.4432
***** Episode 5516, Mean R = -132.7  Std R = 34.4  Min R = -194.6
PolicyLoss: 3.04
Policy_Entropy: 0.0166
Policy_KL: 0.0127
Policy_SD: 0.95
Steps: 1.06e+04
TotalSteps: 1.6e+06
VF_0_ExplainedVarNew: 0.753
VF_0_ExplainedVarOld: 0.701
VF_0_Loss : 0.0608


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0495   0.0176   0.0958   9.0125   2.2441   3.0464
ADVA:  (15664,) (31999,) 0.4895152973530423
ADV1:  -0.0029810529805553293 0.0005868409745422591 0.21465214489236115 2.4187400913238526 -1.8423762949307783
ADVB:  (14812,) (31999,) 0.4628894652957905
ADV2:  0.0 0.19590918889859102 0.5277102389400282 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2951   0.1040   0.4847   9.0186   3.8139   2.4432
***** Episode 5547, Mean R = -131.8  Std R = 33.5  Min R = -232.4
PolicyLoss: 3.01
Policy_Entropy: 0.0163
Policy_KL: 0.0122
Policy_SD: 0.951
Steps: 1.1e+04
TotalSteps: 1.61e+06
VF_0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1495   0.0510   0.2510   9.0186   3.8139   2.4432
***** Episode 5764, Mean R = -127.6  Std R = 35.7  Min R = -193.3
PolicyLoss: 2.71
Policy_Entropy: 0.017
Policy_KL: 0.00926
Policy_SD: 0.951
Steps: 1.07e+04
TotalSteps: 1.68e+06
VF_0_ExplainedVarNew: 0.687
VF_0_ExplainedVarOld: 0.544
VF_0_Loss : 0.0615


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0479   0.0204   0.1028   9.0125   2.2441   3.0464
ADVA:  (14416,) (31664,) 0.4552804446690248
ADV1:  -0.0027248903242319683 -0.00799929939566407 0.20496380685993634 2.536799221038818 -2.7007034898438427
ADVB:  (16093,) (31664,) 0.5082427993936331
ADV2:  0.004081023943925244 0.20131781262306325 0.5096337008494076 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2332   0.0821   0.3790   9.0186   3.8139   2.4432
***** Episode 5795, Mean R = -126.6  Std R = 28.5  Min R = -166.0
PolicyLoss: 2.83
Policy_Entropy: 0.0174
Policy_KL: 0.01
Policy_SD: 0.95
Steps: 1.04e+04
TotalSteps

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1678   0.0548   0.2709   9.0186   3.8139   2.4432
***** Episode 6012, Mean R = -120.5  Std R = 32.0  Min R = -180.6
PolicyLoss: 2.79
Policy_Entropy: 0.0174
Policy_KL: 0.00855
Policy_SD: 0.949
Steps: 1.06e+04
TotalSteps: 1.77e+06
VF_0_ExplainedVarNew: 0.657
VF_0_ExplainedVarOld: 0.619
VF_0_Loss : 0.0625


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0235   0.0070   0.0405   9.0125   2.2441   3.0464
ADVA:  (17033,) (32671,) 0.5213492087784274
ADV1:  0.005918689535514957 0.006991278413693119 0.20207909340531263 2.029977617263794 -1.7626346087455773
ADVB:  (20723,) (32671,) 0.6342934100578494
ADV2:  0.06961210300360757 0.23688082329669016 0.5221544414544286 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2435   0.0823   0.4404   9.0186   3.8139   2.4432
***** Episode 6043, Mean R = -130.3  Std R = 32.9  Min R = -176.4
PolicyLoss: 2.67
Policy_Entropy: 0.0179
Policy_KL: 0.00732
Policy_SD: 0.948
Steps: 1.09e+04
TotalSte

***** Episode 6260, Mean R = -111.4  Std R = 32.7  Min R = -170.4
PolicyLoss: 2.78
Policy_Entropy: 0.018
Policy_KL: 0.0108
Policy_SD: 0.944
Steps: 1.08e+04
TotalSteps: 1.86e+06
VF_0_ExplainedVarNew: 0.691
VF_0_ExplainedVarOld: 0.616
VF_0_Loss : 0.0566


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0341   0.0248   0.1245   9.0125   2.2441   3.0464
ADVA:  (15518,) (32553,) 0.47669953614106225
ADV1:  0.0 0.0018889131728466897 0.19993161373674062 2.4813521194458006 -2.5417571096327007
ADVB:  (19163,) (32553,) 0.5886707830307498
ADV2:  0.054639875655562796 0.24632422278261407 0.5285583110886384 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9830   0.4009   1.7644   9.0186   3.8139   2.4432
***** Episode 6291, Mean R = -115.2  Std R = 33.7  Min R = -168.7
PolicyLoss: 2.94
Policy_Entropy: 0.0181
Policy_KL: 0.0116
Policy_SD: 0.944
Steps: 1.08e+04
TotalSteps: 1.87e+06
VF_0_ExplainedVarNew: 0.633
VF_0_ExplainedVarOld: 0.494
VF_0_Loss : 0.0581


ValFun  Gradients: u/sd/Max/C

attitude |   -0.10    0.03    0.09 |    1.18    0.67    1.80 |   -3.14   -1.56   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.05   -0.09   -0.06 |    0.05    0.09    0.05
a_f      |   -0.02    0.05 |    0.68    1.76 |   -1.45   -3.10 |    1.48    3.14
w_f      |    0.00   -0.00    0.00 |    0.01    0.03    0.02 |   -0.05   -0.09   -0.04 |    0.04    0.09    0.05
w_rewards |   -0.04 |    0.14 |   -1.12 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |  -30.00 |   24.49 |  -50.00 |    0.00
theta_cv |    0.57 |    0.33 |    0.00 |    3.09
seeker_angles |   -0.02   -0.01 |    0.29    0.28 |   -1.00   -1.00 |    1.00    1.00
cs_angles | -0.0219 -0.0114 |  0.2929  0.2801 | -0.9996 -0.9996 |  0.9997  0.9999
optical_flow | -0.0001  0.0000 |  0.0195  0.0192 | -0.7640 -1.2154 |  0.7808  0.6662
v_err    | -0.0205 |  0.0750 | -0.5190 |  0.5229
landing_rewards |    0.06 |    0.80 |    0.00 |   10.00
landing_margin |   46

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2512   0.0978   0.4292  22.0738   6.9309   5.4641
Update Cnt = 220    ET =   1458.2   Stats:  Mean, Std, Min, Max
r_f      |  -15.07   -2.60    9.98 |  195.75  182.63  212.11 | -439.42 -524.25 -629.66 |  555.99  386.25  631.64
v_f      |   -0.01    0.00   -0.01 |    0.24    0.24    0.23 |   -0.96   -0.83   -0.69 |    0.65    0.89    0.75
r_i      |  -45.17    1.39   64.51 |  683.31  671.89  750.46 |-1337.66-1242.67-1271.69 | 1243.85 1322.20 1341.32
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.09    0.09
norm_rf  |   42.12 |   58.12 |    0.73 |  405.24
norm_vf  |    0.36 |    0.20 |    0.05 |    1.06
gs_f     |    0.96 |    1.22 |    0.02 |    9.31
thrust   |    0.00    0.00   -0.01 |    0.93    0.93    0.93 |   -3.28   -3.40   -3.42 |    3.30    3.46    3.32
norm_thrust |    1.47 |    0.68 |    0.00 |    3.46
fuel     |    4.66 |    0.44 |    2.23 |    5.59
rewards  | -122.26 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4081   0.1451   0.6634  22.0738   6.9309   5.4641
***** Episode 7128, Mean R = -118.1  Std R = 35.3  Min R = -188.6
PolicyLoss: 2.7
Policy_Entropy: 0.0187
Policy_KL: 0.00564
Policy_SD: 0.934
Steps: 1.07e+04
TotalSteps: 2.17e+06
VF_0_ExplainedVarNew: 0.737
VF_0_ExplainedVarOld: 0.675
VF_0_Loss : 0.0391


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0230   0.0069   0.0382   9.0125   2.2441   3.0464
ADVA:  (13891,) (32903,) 0.4221803482965079
ADV1:  0.0 0.00132301873836117 0.19512289521227263 2.8355478032430015 -2.034308740006151
ADVB:  (20140,) (32903,) 0.6121022399173328
ADV2:  0.06459458891055465 0.22992663350564663 0.4968371281739196 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4707   0.1962   0.9189  22.0738   6.9309   5.4641
***** Episode 7159, Mean R = -116.6  Std R = 33.7  Min R = -171.6
PolicyLoss: 2.62
Policy_Entropy: 0.0189
Policy_KL: 0.00774
Policy_SD: 0.933
Steps: 1.11e+04
TotalSteps: 2.18e+06
VF_0_E

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4461   0.2006   0.9339  22.0738   6.9309   5.4641
***** Episode 7376, Mean R = -107.2  Std R = 36.5  Min R = -195.7
PolicyLoss: 2.29
Policy_Entropy: 0.0199
Policy_KL: 0.00867
Policy_SD: 0.927
Steps: 1.14e+04
TotalSteps: 2.26e+06
VF_0_ExplainedVarNew: 0.538
VF_0_ExplainedVarOld: 0.461
VF_0_Loss : 0.037


*** W VIO TYPE CNT:  [130. 116. 155.]
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0498   0.0170   0.0861   9.0125   2.2441   3.0464
ADVA:  (12660,) (33154,) 0.3818543765458165
ADV1:  0.0 0.0016881797194092676 0.2000077406036256 2.216326316154508 -5.100947574122027
ADVB:  (20753,) (33154,) 0.6259576521686674
ADV2:  0.06915047412319301 0.22211737403040127 0.481333018530503 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2722   0.0928   0.4492  22.0738   6.9309   5.4641
***** Episode 7407, Mean R = -121.1  Std R = 34.9  Min R = -207.3
PolicyLoss: 2.44
Policy_Entropy: 0.0202
Policy_KL: 0.00722
Policy_SD: 0.927
Steps:

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3798   0.1407   0.6642  22.0738   6.9309   5.4641
***** Episode 7624, Mean R = -112.2  Std R = 37.9  Min R = -173.9
PolicyLoss: 2.21
Policy_Entropy: 0.022
Policy_KL: 0.00982
Policy_SD: 0.924
Steps: 1.1e+04
TotalSteps: 2.34e+06
VF_0_ExplainedVarNew: 0.687
VF_0_ExplainedVarOld: 0.618
VF_0_Loss : 0.0362


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0187   0.0071   0.0348   9.0125   2.2441   3.0464
ADVA:  (18197,) (32890,) 0.553268470659775
ADV1:  0.005387427961886876 0.0016529061876454233 0.16804880715579748 1.999936548868815 -2.141443395450739
ADVB:  (21244,) (32890,) 0.6459106111280024
ADV2:  0.07887608171839201 0.22496832220267873 0.46887512074938603 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7083   0.2903   1.3715  22.0738   6.9309   5.4641
***** Episode 7655, Mean R = -97.9  Std R = 35.7  Min R = -175.1
PolicyLoss: 2.37
Policy_Entropy: 0.0219
Policy_KL: 0.00868
Policy_SD: 0.926
Steps: 1.1e+04
TotalSteps: 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3476   0.1445   0.6593  28.1955   8.9046   7.3693
***** Episode 7872, Mean R = -103.8  Std R = 34.5  Min R = -196.9
PolicyLoss: 2.26
Policy_Entropy: 0.0227
Policy_KL: 0.013
Policy_SD: 0.925
Steps: 1.13e+04
TotalSteps: 2.43e+06
VF_0_ExplainedVarNew: 0.553
VF_0_ExplainedVarOld: 0.475
VF_0_Loss : 0.0332


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0177   0.0057   0.0295   9.0125   2.2441   3.0464
ADVA:  (19779,) (34133,) 0.5794685494975537
ADV1:  0.0006844368122782621 -0.006249667181738933 0.18045135783195687 1.761232531926853 -2.457440649275048
ADVB:  (18308,) (34133,) 0.5363724255119678
ADV2:  0.021739119421303604 0.18991784653768098 0.43519188142540227 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2549   0.0837   0.4314  28.1955   8.9046   7.3693
***** Episode 7903, Mean R = -114.4  Std R = 43.8  Min R = -193.1
PolicyLoss: 2.37
Policy_Entropy: 0.023
Policy_KL: 0.00891
Policy_SD: 0.925
Steps: 1.15e+04
TotalSte

***** Episode 8120, Mean R = -106.8  Std R = 39.6  Min R = -193.9
PolicyLoss: 2.73
Policy_Entropy: 0.0238
Policy_KL: 0.0104
Policy_SD: 0.923
Steps: 1.13e+04
TotalSteps: 2.52e+06
VF_0_ExplainedVarNew: 0.737
VF_0_ExplainedVarOld: 0.663
VF_0_Loss : 0.021


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0489   0.0134   0.0779   9.0125   2.2441   3.0464
ADVA:  (22688,) (33668,) 0.6738743020078413
ADV1:  -0.00011274067401040311 -0.004794032738029969 0.1718361194885964 1.837324766733094 -2.2257959902540674
ADVB:  (11082,) (33668,) 0.3291552809789711
ADV2:  0.0 0.15038885419670295 0.46623681205655837 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.9799   4.4369  16.5556  28.1955   8.9046   7.3693
***** Episode 8151, Mean R = -126.8  Std R = 38.1  Min R = -201.8
PolicyLoss: 2.9
Policy_Entropy: 0.0243
Policy_KL: 0.0215
Policy_SD: 0.922
Steps: 1.1e+04
TotalSteps: 2.53e+06
VF_0_ExplainedVarNew: 0.701
VF_0_ExplainedVarOld: 0.621
VF_0_Loss : 0.0334


ValFun  Gradients: u/sd/Max/C 

w        |   -0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.05   -0.08   -0.05 |    0.06    0.10    0.06
a_f      |   -0.04    0.05 |    0.70    1.79 |   -1.29   -3.14 |    1.53    3.11
w_f      |   -0.00    0.00    0.00 |    0.02    0.03    0.02 |   -0.04   -0.08   -0.04 |    0.04    0.08    0.06
w_rewards |   -0.05 |    0.21 |   -2.28 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |  -18.87 |   24.24 |  -50.00 |    0.00
theta_cv |    0.57 |    0.32 |    0.00 |    3.08
seeker_angles |   -0.01    0.03 |    0.26    0.27 |   -1.00   -1.00 |    1.00    1.00
cs_angles | -0.0149  0.0253 |  0.2613  0.2742 | -0.9992 -0.9994 |  0.9999  0.9995
optical_flow |  0.0001  0.0002 |  0.0215  0.0220 | -0.8428 -0.7963 |  0.7848  0.5593
v_err    | -0.0210 |  0.0746 | -0.5184 |  0.4958
landing_rewards |    0.61 |    2.40 |    0.00 |   10.00
landing_margin |   37.68 |   82.22 |   -0.06 |  508.53
tracking_rewards |  -77.83 |   22.20 | -144.74 |  -36.41
steps    |     359 |  

Update Cnt = 280    ET =   1293.6   Stats:  Mean, Std, Min, Max
r_f      |   -0.16   -1.65   20.76 |  189.82  185.66  219.09 | -479.57 -613.86 -498.95 |  688.62  526.65  457.64
v_f      |    0.01   -0.01   -0.00 |    0.20    0.21    0.19 |   -0.81   -1.05   -0.75 |    0.81    0.70    0.67
r_i      |  -15.37   -0.48   50.09 |  635.71  672.80  788.89 |-1269.00-1263.64-1363.30 | 1266.08 1273.79 1342.91
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.08   -0.09   -0.10 |    0.10    0.09    0.10
norm_rf  |   32.97 |   69.25 |    0.19 |  526.48
norm_vf  |    0.28 |    0.21 |    0.03 |    1.11
gs_f     |    1.07 |    1.99 |    0.02 |   29.09
thrust   |   -0.00   -0.00    0.00 |    0.95    0.93    0.93 |   -3.42   -3.41   -3.42 |    3.45    3.34    3.31
norm_thrust |    1.47 |    0.69 |    0.00 |    3.46
fuel     |    4.21 |    0.47 |    1.93 |    5.13
rewards  | -109.42 |   39.73 | -215.77 |  -39.40
fuel_rewards |  -12.05 |    1.35 |  -14.69 |   -5.53
glideslope_rewards | 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3066   0.1175   0.5565  28.1955   8.9046   7.3693
***** Episode 8988, Mean R = -94.1  Std R = 46.1  Min R = -226.0
PolicyLoss: 2.49
Policy_Entropy: 0.0271
Policy_KL: 0.00793
Policy_SD: 0.912
Steps: 1.07e+04
TotalSteps: 2.84e+06
VF_0_ExplainedVarNew: 0.846
VF_0_ExplainedVarOld: 0.82
VF_0_Loss : 0.0132


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0151   0.0056   0.0321   9.0125   2.2441   3.0464
ADVA:  (17616,) (33438,) 0.5268257670913332
ADV1:  0.0 -0.002372606966736525 0.12842352607086296 2.034647045135498 -2.2611687713262922
ADVB:  (16949,) (33438,) 0.5068784018182906
ADV2:  0.004144939979833721 0.1898375665564309 0.42527801064415005 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3148   0.1079   0.5178  28.1955   8.9046   7.3693
***** Episode 9019, Mean R = -101.0  Std R = 41.0  Min R = -192.4
PolicyLoss: 2.37
Policy_Entropy: 0.0275
Policy_KL: 0.0077
Policy_SD: 0.912
Steps: 1.14e+04
TotalSteps: 2.85e+06
VF_0_

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3218   0.0995   0.4979  28.1955   8.9046   7.3693
***** Episode 9236, Mean R = -73.4  Std R = 23.3  Min R = -138.3
PolicyLoss: 2.54
Policy_Entropy: 0.0279
Policy_KL: 0.00905
Policy_SD: 0.898
Steps: 1.15e+04
TotalSteps: 2.93e+06
VF_0_ExplainedVarNew: 0.386
VF_0_ExplainedVarOld: -0.158
VF_0_Loss : 0.0122


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0212   0.0092   0.0449   9.0125   2.2441   3.0464
ADVA:  (18457,) (34561,) 0.5340412603801973
ADV1:  0.0 -0.005822143300387354 0.14142093660970464 2.0591311684230917 -2.1346721076965354
ADVB:  (17872,) (34561,) 0.5171146668209832
ADV2:  0.01221118145478818 0.1816975569845285 0.4159974401432963 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0728   0.3946   2.1027  28.1955   8.9046   7.3693
***** Episode 9267, Mean R = -93.5  Std R = 42.3  Min R = -180.5
PolicyLoss: 2.23
Policy_Entropy: 0.0279
Policy_KL: 0.0088
Policy_SD: 0.896
Steps: 1.15e+04
TotalSteps: 2.94e+06
VF_0_

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1462   0.0452   0.2365  28.1955   8.9046   7.3693
***** Episode 9484, Mean R = -96.7  Std R = 42.9  Min R = -183.5
PolicyLoss: 1.93
Policy_Entropy: 0.03
Policy_KL: 0.00958
Policy_SD: 0.89
Steps: 1.12e+04
TotalSteps: 3.02e+06
VF_0_ExplainedVarNew: 0.624
VF_0_ExplainedVarOld: 0.508
VF_0_Loss : 0.0277


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0148   0.0086   0.0370   9.0125   2.2441   3.0464
ADVA:  (15289,) (33940,) 0.45047142015321157
ADV1:  0.0 0.0015440487437190706 0.15123465889509805 2.303055254618327 -2.1774360263347647
ADVB:  (19355,) (33940,) 0.5702710665880967
ADV2:  0.033504784175515424 0.1819786807469874 0.42521013418724574 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8162   0.3688   1.6673  28.1955   8.9046   7.3693
***** Episode 9515, Mean R = -91.6  Std R = 36.6  Min R = -176.2
PolicyLoss: 1.96
Policy_Entropy: 0.0303
Policy_KL: 0.00935
Policy_SD: 0.891
Steps: 1.14e+04
TotalSteps: 3.03e+06
VF_0_E

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8739   0.4733   2.0871  28.1955   8.9046   7.3693
***** Episode 9732, Mean R = -76.9  Std R = 30.1  Min R = -153.5
PolicyLoss: 2.05
Policy_Entropy: 0.032
Policy_KL: 0.0108
Policy_SD: 0.888
Steps: 1.14e+04
TotalSteps: 3.11e+06
VF_0_ExplainedVarNew: 0.622
VF_0_ExplainedVarOld: 0.578
VF_0_Loss : 0.0195


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0158   0.0057   0.0286   9.0125   2.2441   3.0464
ADVA:  (21076,) (34042,) 0.6191175606603607
ADV1:  0.0 -0.007361464299675166 0.12749752670058329 1.8489470458733663 -2.073655019463065
ADVB:  (14019,) (34042,) 0.4118148169907761
ADV2:  0.0 0.13621292106947008 0.3488048263418494 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3627   0.1547   0.7294  28.1955   8.9046   7.3693
***** Episode 9763, Mean R = -84.0  Std R = 38.9  Min R = -173.9
PolicyLoss: 1.99
Policy_Entropy: 0.0316
Policy_KL: 0.0102
Policy_SD: 0.889
Steps: 1.14e+04
TotalSteps: 3.12e+06
VF_0_ExplainedVarNew: 0.

***** Episode 9980, Mean R = -90.7  Std R = 38.4  Min R = -164.9
PolicyLoss: 2.03
Policy_Entropy: 0.0332
Policy_KL: 0.00658
Policy_SD: 0.882
Steps: 1.14e+04
TotalSteps: 3.2e+06
VF_0_ExplainedVarNew: 0.703
VF_0_ExplainedVarOld: 0.661
VF_0_Loss : 0.0181


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0154   0.0081   0.0339   9.0125   2.2441   3.0464
ADVA:  (18014,) (34131,) 0.5277899856435498
ADV1:  -0.0006869564078024054 -0.006130318215287113 0.12780109621362834 1.9458817513783773 -2.377271462082865
ADVB:  (15423,) (34131,) 0.45187659312648326
ADV2:  0.0 0.1723970928025613 0.42603319983365817 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6623   0.3286   1.3355  28.1955   8.9046   7.3693
***** Episode 10011, Mean R = -77.6  Std R = 26.3  Min R = -141.7
PolicyLoss: 2.31
Policy_Entropy: 0.0332
Policy_KL: 0.00773
Policy_SD: 0.88
Steps: 1.13e+04
TotalSteps: 3.21e+06
VF_0_ExplainedVarNew: 0.569
VF_0_ExplainedVarOld: 0.371
VF_0_Loss : 0.014


ValFun  Gradients: u/sd/Max/C

attitude |   -0.01   -0.02   -0.01 |    1.22    0.63    1.87 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.07   -0.08   -0.06 |    0.05    0.07    0.06
a_f      |   -0.01    0.01 |    0.66    1.88 |   -1.44   -3.13 |    1.47    3.14
w_f      |    0.00    0.00    0.00 |    0.02    0.03    0.02 |   -0.04   -0.08   -0.06 |    0.04    0.07    0.06
w_rewards |   -0.02 |    0.08 |   -0.79 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |   -6.61 |   16.94 |  -50.00 |    0.00
theta_cv |    0.49 |    0.28 |    0.00 |    2.95
seeker_angles |    0.01    0.01 |    0.19    0.21 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0119  0.0090 |  0.1950  0.2137 | -0.9998 -0.9980 |  0.9971  0.9999
optical_flow | -0.0001  0.0003 |  0.0214  0.0244 | -0.9949 -1.0553 |  0.6716  0.8990
v_err    | -0.0118 |  0.0724 | -0.5173 |  0.5588
landing_rewards |    1.03 |    3.04 |    0.00 |   10.00
landing_margin |   11

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4795   0.1936   0.8497  28.1955   8.9046   7.8382
Update Cnt = 340    ET =   1446.6   Stats:  Mean, Std, Min, Max
r_f      |    3.37   20.37    1.88 |  186.07  171.29  205.38 | -381.45 -396.53 -392.93 |  392.48  392.68  408.07
v_f      |   -0.01   -0.00   -0.00 |    0.11    0.10    0.11 |   -0.57   -0.44   -0.47 |    0.43    0.62    0.67
r_i      |   24.73   50.76    7.21 |  679.86  642.27  776.15 |-1268.18-1281.71-1289.99 | 1286.90 1321.09 1274.58
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.10    0.09
norm_rf  |    6.01 |   18.48 |    0.18 |  235.52
norm_vf  |    0.16 |    0.10 |    0.02 |    0.86
gs_f     |    1.04 |    1.19 |    0.03 |    9.69
thrust   |   -0.00   -0.00    0.00 |    0.92    0.90    0.90 |   -3.39   -3.43   -3.45 |    3.46    3.37    3.44
norm_thrust |    1.42 |    0.68 |    0.00 |    3.46
fuel     |    3.65 |    0.30 |    2.92 |    4.56
rewards  |  -70.31 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3193   0.1222   0.6362  28.1955   8.9046   7.8382
***** Episode 10848, Mean R = -65.2  Std R = 26.9  Min R = -150.5
PolicyLoss: 2.2
Policy_Entropy: 0.0372
Policy_KL: 0.00855
Policy_SD: 0.842
Steps: 1.15e+04
TotalSteps: 3.52e+06
VF_0_ExplainedVarNew: 0.52
VF_0_ExplainedVarOld: 0.421
VF_0_Loss : 0.0171


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0130   0.0072   0.0288   9.0125   2.2441   3.0464
ADVA:  (19163,) (34172,) 0.560780756174646
ADV1:  0.0 0.00289703294593041 0.09374643600979926 2.0720865996243734 -2.1527909362316153
ADVB:  (13572,) (34172,) 0.3971672714503102
ADV2:  0.0 0.1889482000057913 0.44232408620014324 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5805   0.2208   1.1159  28.1955   8.9046   7.8382
***** Episode 10879, Mean R = -60.3  Std R = 17.7  Min R = -116.8
PolicyLoss: 2.78
Policy_Entropy: 0.0373
Policy_KL: 0.00935
Policy_SD: 0.844
Steps: 1.14e+04
TotalSteps: 3.53e+06
VF_0_ExplainedVarNew: 0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8472   0.3908   1.6689  28.1955   8.9046   7.8382
***** Episode 11096, Mean R = -62.3  Std R = 27.8  Min R = -175.5
PolicyLoss: 2.63
Policy_Entropy: 0.0388
Policy_KL: 0.0199
Policy_SD: 0.843
Steps: 1.14e+04
TotalSteps: 3.61e+06
VF_0_ExplainedVarNew: 0.84
VF_0_ExplainedVarOld: 0.68
VF_0_Loss : 0.0123


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0083   0.0040   0.0177   9.0125   2.2441   3.0464
ADVA:  (16684,) (34266,) 0.486896632230199
ADV1:  0.0 0.0017828590826007291 0.07971473804431121 1.4048092114441864 -1.412775210930371
ADVB:  (18661,) (34266,) 0.5445923072433316
ADV2:  0.038623850131727915 0.23256334831244968 0.43487542321645534 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6424   0.2843   1.3546  28.1955   8.9046   7.8382
***** Episode 11127, Mean R = -73.3  Std R = 37.0  Min R = -191.8
PolicyLoss: 2.52
Policy_Entropy: 0.0392
Policy_KL: 0.00679
Policy_SD: 0.844
Steps: 1.14e+04
TotalSteps: 3.62e+06
VF_0_

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5727   0.2878   1.2949  28.1955   8.9046   7.8382
***** Episode 11344, Mean R = -53.1  Std R = 19.2  Min R = -134.4
PolicyLoss: 2.77
Policy_Entropy: 0.0388
Policy_KL: 0.00704
Policy_SD: 0.836
Steps: 1.15e+04
TotalSteps: 3.7e+06
VF_0_ExplainedVarNew: 0.795
VF_0_ExplainedVarOld: 0.757
VF_0_Loss : 0.0104


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0139   0.0052   0.0239   9.0125   2.2441   3.0464
ADVA:  (18105,) (34286,) 0.5280580995158374
ADV1:  0.0 -0.0010586063004101806 0.060685928864063536 1.7727830891918108 -1.7975340686579764
ADVB:  (17186,) (34286,) 0.5012541562153648
ADV2:  0.0013985949789912162 0.18494367959059443 0.3596608076954592 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1869   0.0660   0.3193  28.1955   8.9046   7.8382
***** Episode 11375, Mean R = -60.7  Std R = 30.1  Min R = -156.9
PolicyLoss: 2.18
Policy_Entropy: 0.0387
Policy_KL: 0.00609
Policy_SD: 0.835
Steps: 1.13e+04
TotalSteps: 3.72e+06

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.4285   1.8994   6.2516  34.6670  14.5135  10.6446
***** Episode 11592, Mean R = -52.9  Std R = 18.6  Min R = -116.8
PolicyLoss: 3.34
Policy_Entropy: 0.0414
Policy_KL: 0.00719
Policy_SD: 0.824
Steps: 1.15e+04
TotalSteps: 3.8e+06
VF_0_ExplainedVarNew: 0.867
VF_0_ExplainedVarOld: 0.74
VF_0_Loss : 0.0106


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0009   0.0051   9.0125   2.2441   3.0464
ADVA:  (14861,) (34524,) 0.4304541768045418
ADV1:  0.000396748682972535 0.0006064521417450781 0.045743691747313846 0.5864738475424984 -1.3423738827484386
ADVB:  (22352,) (34524,) 0.6474336693314795
ADV2:  0.2124976335373395 0.35583751930592256 0.4593470229701532 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1481   0.3543   1.9614  34.6670  14.5135  10.6446
***** Episode 11623, Mean R = -53.9  Std R = 21.7  Min R = -138.6
PolicyL

***** Episode 11840, Mean R = -50.3  Std R = 10.7  Min R = -72.2
PolicyLoss: 3.62
Policy_Entropy: 0.0428
Policy_KL: 0.00515
Policy_SD: 0.813
Steps: 1.17e+04
TotalSteps: 3.89e+06
VF_0_ExplainedVarNew: 0.797
VF_0_ExplainedVarOld: 0.782
VF_0_Loss : 0.0061


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0041   0.0021   0.0092   9.0125   2.2441   3.0464
ADVA:  (16894,) (34611,) 0.4881107162462801
ADV1:  0.0 -0.00022208997018175115 0.032373150181276004 1.4528293895721436 -0.5700491167608468
ADVB:  (16367,) (34611,) 0.47288434312790734
ADV2:  0.0 0.2932199854302594 0.5024254560720218 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2322   0.0719   0.3937  34.6670  14.5135  10.6446
***** Episode 11871, Mean R = -47.4  Std R = 14.3  Min R = -84.0
PolicyLoss: 3.59
Policy_Entropy: 0.043
Policy_KL: 0.00423
Policy_SD: 0.813
Steps: 1.15e+04
TotalSteps: 3.9e+06
VF_0_ExplainedVarNew: 0.816
V

w        |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.05   -0.05   -0.05 |    0.05    0.06    0.05
a_f      |    0.04   -0.05 |    0.62    1.76 |   -1.39   -3.14 |    1.42    3.09
w_f      |    0.00    0.00   -0.00 |    0.02    0.02    0.02 |   -0.05   -0.05   -0.04 |    0.04    0.06    0.04
w_rewards |   -0.00 |    0.01 |   -0.19 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |   -0.65 |    5.64 |  -50.00 |    0.00
theta_cv |    0.42 |    0.26 |    0.00 |    2.26
seeker_angles |    0.01    0.01 |    0.14    0.13 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0076  0.0107 |  0.1388  0.1343 | -0.9980 -0.9987 |  0.9969  0.9962
optical_flow |  0.0000  0.0002 |  0.0227  0.0261 | -0.9329 -1.1251 |  0.8590  1.2360
v_err    | -0.0085 |  0.0671 | -0.4995 |  0.4183
landing_rewards |    2.94 |    4.55 |    0.00 |   10.00
landing_margin |    0.63 |    5.57 |   -0.06 |   83.94
tracking_rewards |  -42.40 |   15.31 | -126.93 |  -20.57
steps    |     370 |  

Update Cnt = 400    ET =   1393.7   Stats:  Mean, Std, Min, Max
r_f      |    6.76  -15.23   -0.61 |  191.12  168.76  205.17 | -382.90 -405.55 -381.98 |  390.90  396.17  591.59
v_f      |    0.00   -0.00   -0.00 |    0.08    0.08    0.09 |   -0.17   -0.44   -0.64 |    0.63    0.35    0.22
r_i      |    6.10  -52.92    8.66 |  695.13  661.45  747.39 |-1303.47-1362.85-1254.09 | 1233.12 1316.23 1318.16
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.09    0.08    0.09
norm_rf  |    3.90 |   26.57 |    0.11 |  421.64
norm_vf  |    0.12 |    0.08 |    0.03 |    0.76
gs_f     |    1.07 |    1.26 |    0.01 |   12.11
thrust   |    0.00   -0.00    0.00 |    0.87    0.85    0.85 |   -3.39   -3.46   -3.45 |    3.28    3.42    3.31
norm_thrust |    1.32 |    0.69 |    0.00 |    3.46
fuel     |    2.98 |    0.35 |    2.28 |    4.39
rewards  |  -50.69 |   23.99 | -171.37 |  -18.93
fuel_rewards |   -8.52 |    0.99 |  -12.55 |   -6.53
glideslope_rewards | 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2697   0.1072   0.4910  34.6670  14.5135  10.6446
***** Episode 12708, Mean R = -50.9  Std R = 26.0  Min R = -158.2
PolicyLoss: 3.42
Policy_Entropy: 0.0502
Policy_KL: 0.00422
Policy_SD: 0.787
Steps: 1.15e+04
TotalSteps: 4.21e+06
VF_0_ExplainedVarNew: 0.895
VF_0_ExplainedVarOld: 0.882
VF_0_Loss : 0.00631


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0010   0.0050   9.0125   2.2441   3.0464
ADVA:  (17441,) (34943,) 0.4991271499298858
ADV1:  0.0 0.00045542752044769753 0.029844320542489283 0.7532143274943034 -0.4493107509342035
ADVB:  (17995,) (34943,) 0.5149815413673697
ADV2:  0.028224083017324552 0.3185055118691114 0.4928006521305078 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2572   0.6100   2.1347  34.6670  14.5135  10.6446
***** Episode 12739, Mean R = -42.9  Std R = 13.3  Min R = -80.8
PolicyLoss: 3.44
Policy_Entropy: 0.0496
Policy_KL: 0.00407
Policy_SD: 0.776
Steps: 1.17e+04
TotalSteps: 4.22e+06


Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5002   0.1972   0.9417  34.6670  14.5135  10.6446
***** Episode 12956, Mean R = -45.2  Std R = 14.8  Min R = -77.3
PolicyLoss: 3.8
Policy_Entropy: 0.0524
Policy_KL: 0.00397
Policy_SD: 0.772
Steps: 1.18e+04
TotalSteps: 4.3e+06
VF_0_ExplainedVarNew: 0.849
VF_0_ExplainedVarOld: 0.835
VF_0_Loss : 0.00632


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0017   0.0072   9.0125   2.2441   3.0464
ADVA:  (16179,) (35073,) 0.46129501325806177
ADV1:  0.0 0.0015590075291040304 0.024767119918686856 0.333364531993866 -0.2752655728368656
ADVB:  (19513,) (35073,) 0.5563538904570468
ADV2:  0.12226160222260946 0.42286529130268297 0.5897316981663845 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5274   0.2267   1.0683  34.6670  14.5135  10.6446
***** Episode 12987, Mean R = -37.5  Std R = 11.9  Min R = -66.0
PolicyLoss: 4.22
Policy_Entropy: 0.0523
Policy_KL: 0.00438
Policy_SD: 0.771
Steps: 1.16e+04
TotalSteps: 4.31e+06
VF_0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1981   0.0600   0.3253  34.6670  14.5135  10.6446
***** Episode 13204, Mean R = -38.0  Std R = 13.9  Min R = -80.3
PolicyLoss: 3.04
Policy_Entropy: 0.0553
Policy_KL: 0.00542
Policy_SD: 0.761
Steps: 1.17e+04
TotalSteps: 4.4e+06
VF_0_ExplainedVarNew: 0.811
VF_0_ExplainedVarOld: 0.784
VF_0_Loss : 0.00423


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0009   0.0036   9.0125   2.2441   3.0464
ADVA:  (16948,) (35109,) 0.4827252271497337
ADV1:  0.0 0.0009686643554029481 0.02330063156954631 0.3093497484119263 -0.2560048781907
ADVB:  (18833,) (35109,) 0.5364151642029109
ADV2:  0.085050114453867 0.4093807087847662 0.5771844320015234 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3327   0.1336   0.6237  34.6670  14.5135  10.6446
***** Episode 13235, Mean R = -38.0  Std R = 10.5  Min R = -68.7
PolicyLoss: 4.15
Policy_Entrop

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5586   0.2259   1.0023  34.6670  14.5135  10.6446
***** Episode 13452, Mean R = -38.5  Std R = 14.3  Min R = -72.7
PolicyLoss: 4.01
Policy_Entropy: 0.0596
Policy_KL: 0.00698
Policy_SD: 0.738
Steps: 1.15e+04
TotalSteps: 4.49e+06
VF_0_ExplainedVarNew: 0.864
VF_0_ExplainedVarOld: 0.856
VF_0_Loss : 0.00448


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0014   0.0050   9.0125   2.2441   3.0464
ADVA:  (16984,) (34668,) 0.48990423445252107
ADV1:  0.0 0.0005638589830854963 0.02124082218557597 0.23274162920175087 -0.23393382957961228
ADVB:  (16721,) (34668,) 0.48231798776970114
ADV2:  0.0 0.3652879155826794 0.5652018325915046 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3082   0.1156   0.5749  34.6670  14.5135  10.6446
***** Episode 13483, Mean R = -36.5  Std R = 10.5  Min R = -59.8
PolicyLoss: 4.04
Policy_Entropy: 0.0

***** Episode 13700, Mean R = -44.3  Std R = 15.0  Min R = -89.9
PolicyLoss: 3.93
Policy_Entropy: 0.0627
Policy_KL: 0.00608
Policy_SD: 0.743
Steps: 1.15e+04
TotalSteps: 4.58e+06
VF_0_ExplainedVarNew: 0.85
VF_0_ExplainedVarOld: 0.841
VF_0_Loss : 0.00476


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0001   0.0008   9.0125   2.2441   3.0464
ADVA:  (16500,) (34669,) 0.47592950474487294
ADV1:  0.0 0.0007736171806693815 0.021709960706535202 0.21868098971467947 -0.25594631409233026
ADVB:  (17571,) (34669,) 0.5068216562346766
ADV2:  0.014184356002544788 0.3928088053551481 0.5711111674074875 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.6623   1.7969   6.4570  34.6670  14.5135  10.6446
***** Episode 13731, Mean R = -39.7  Std R = 12.0  Min R = -67.6
PolicyLoss: 4.09
Policy_Entropy: 0.0629
Policy_KL: 0.00564
Policy_SD: 0.742
Steps: 1.16e+04
TotalSteps: 4.59e+06
VF_0_ExplainedVarNew: 0.854
VF_0_ExplainedVarOld: 0.85
VF_0_Loss : 0.00451


Dynamics: Max Disturbance 

attitude |   -0.09    0.03    0.01 |    1.21    0.66    1.84 |   -3.14   -1.56   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.05   -0.06   -0.05 |    0.07    0.06    0.05
a_f      |    0.03   -0.11 |    0.66    1.82 |   -1.38   -3.14 |    1.50    3.10
w_f      |    0.00    0.00   -0.00 |    0.02    0.02    0.01 |   -0.05   -0.05   -0.04 |    0.07    0.06    0.05
w_rewards |   -0.00 |    0.03 |   -0.39 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.39 |    0.27 |    0.00 |    2.24
seeker_angles |    0.01   -0.00 |    0.12    0.11 |   -0.99   -1.00 |    1.00    1.00
cs_angles |  0.0060 -0.0009 |  0.1158  0.1128 | -0.9888 -0.9995 |  0.9973  0.9974
optical_flow | -0.0000  0.0003 |  0.0270  0.0277 | -1.0042 -1.4169 |  0.9920  1.4611
v_err    | -0.0112 |  0.0641 | -0.5007 |  0.2282
landing_rewards |    4.32 |    4.95 |    0.00 |   10.00
landing_margin |    0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4879   0.1794   0.9019  34.6670  14.5135  10.6446
Update Cnt = 460    ET =   1350.7   Stats:  Mean, Std, Min, Max
r_f      |    4.69   30.54    4.95 |  188.74  170.53  194.52 | -383.82 -374.37 -392.98 |  389.04  386.86  369.32
v_f      |   -0.00   -0.01   -0.00 |    0.07    0.06    0.07 |   -0.15   -0.15   -0.31 |    0.33    0.21    0.16
r_i      |   36.10  114.26   11.06 |  694.03  655.87  748.80 |-1331.05-1261.85-1327.35 | 1298.37 1281.86 1236.59
v_i      |   -0.00   -0.01   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    1.03 |    1.90 |    0.14 |   33.15
norm_vf  |    0.11 |    0.04 |    0.03 |    0.49
gs_f     |    1.19 |    1.73 |    0.01 |   19.95
thrust   |    0.00   -0.00   -0.00 |    0.81    0.80    0.81 |   -3.45   -3.46   -3.45 |    3.43    3.43    3.32
norm_thrust |    1.20 |    0.71 |    0.00 |    3.46
fuel     |    2.45 |    0.35 |    1.81 |    5.06
rewards  |  -37.81 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2220   0.0778   0.3851  34.6670  14.5135  10.6446
***** Episode 14568, Mean R = -34.8  Std R = 11.2  Min R = -64.3
PolicyLoss: 3.76
Policy_Entropy: 0.0749
Policy_KL: 0.00471
Policy_SD: 0.699
Steps: 1.16e+04
TotalSteps: 4.91e+06
VF_0_ExplainedVarNew: 0.845
VF_0_ExplainedVarOld: 0.839
VF_0_Loss : 0.00274


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0071   0.0037   0.0161   9.0125   2.2441   3.0464
ADVA:  (17522,) (34690,) 0.5051023349668492
ADV1:  0.001321445585791 0.002279638452566015 0.019638127695261937 0.1994571879056547 -0.1410245320711448
ADVB:  (19981,) (34690,) 0.575987316229461
ADV2:  0.21163086310317705 0.4754627008446571 0.6040918335314558 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3868   0.1590   0.7533  34.6670  14.5135  10.6446
***** Episode 14599, Mean R = -35.4  Std R = 13.1  Min R = -66.1
PolicyLoss: 4.11
Policy_Entropy: 0.0758
Policy_KL: 0.00434
Policy_SD: 0.697
Steps: 1.15e+04
TotalSteps: 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.9682   4.0566  15.6070  34.6670  14.5135  10.6446
***** Episode 14816, Mean R = -39.3  Std R = 35.8  Min R = -188.5
PolicyLoss: 1.91
Policy_Entropy: 0.078
Policy_KL: 0.0067
Policy_SD: 0.683
Steps: 1.16e+04
TotalSteps: 5e+06
VF_0_ExplainedVarNew: 0.815
VF_0_ExplainedVarOld: 0.605
VF_0_Loss : 0.00712


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0011   0.0055   9.0125   2.2441   3.0464
ADVA:  (18759,) (34695,) 0.5406830955469087
ADV1:  0.0 7.242132689953669e-05 0.05511837106945604 1.0037028199616247 -1.7576556275407986
ADVB:  (17333,) (34695,) 0.49958207234471824
ADV2:  0.0 0.21431714666939675 0.3956489589047833 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6982   0.8070   3.1713  34.6670  14.5135  10.6446
***** Episode 14847, Mean R = -36.7  Std R = 11.6  Min R = -62.3
PolicyLoss: 2.11
Policy_Entropy: 0.0784
Policy_KL: 0.00617
Policy_SD: 0.687
Steps: 1.16e+04
TotalSteps: 5.01e+06
VF_0_ExplainedVarNew: 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8812   0.4575   1.9059  34.6670  14.5135  10.6446
***** Episode 15064, Mean R = -32.0  Std R = 10.9  Min R = -56.5
PolicyLoss: 3.75
Policy_Entropy: 0.0798
Policy_KL: 0.00436
Policy_SD: 0.687
Steps: 1.17e+04
TotalSteps: 5.09e+06
VF_0_ExplainedVarNew: 0.853
VF_0_ExplainedVarOld: 0.841
VF_0_Loss : 0.00162


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0044   0.0023   0.0092   9.0125   2.2441   3.0464
ADVA:  (18119,) (35038,) 0.5171242650836235
ADV1:  0.0 -0.0005089456266841425 0.020954997505651905 0.3656116751937121 -0.18425404542404927
ADVB:  (17826,) (35038,) 0.508761915634454
ADV2:  0.022821239781153378 0.38711473464738755 0.5505854255918454 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7994   0.2903   1.5190  34.6670  14.5135  10.6446
***** Episode 15095, Mean R = -30.0  Std R = 10.1  Min R = -55.6
PolicyLoss: 3.7
Poli

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.0528   1.1108   3.7903  34.6670  14.5135  10.6446
***** Episode 15312, Mean R = -31.0  Std R = 10.1  Min R = -51.5
PolicyLoss: 3.49
Policy_Entropy: 0.0838
Policy_KL: 0.00614
Policy_SD: 0.675
Steps: 1.16e+04
TotalSteps: 5.19e+06
VF_0_ExplainedVarNew: 0.86
VF_0_ExplainedVarOld: 0.854
VF_0_Loss : 0.00107


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0045   0.0025   0.0096   9.0125   2.2441   3.0464
ADVA:  (17471,) (34953,) 0.4998426458386977
ADV1:  0.0 0.0009540847504407422 0.019240424989729048 0.22703776861981245 -0.20142597025528858
ADVB:  (18846,) (34953,) 0.5391811861642778
ADV2:  0.10463355320327697 0.42716844428434353 0.585225574656694 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7844   0.3657   1.4066  34.6670  14.5135  10.6446
***** Episode 15343, Mean R = -33.7  Std R = 14.4  Min R = -68.5
PolicyLoss: 3.81
Policy_Entropy: 0.0843
Policy_KL: 0.0066
Policy_SD: 0.675
Steps: 1.17e+04
TotalSteps: 5.2e+06
VF_0

***** Episode 15560, Mean R = -29.5  Std R = 8.5  Min R = -46.9
PolicyLoss: 3.53
Policy_Entropy: 0.0878
Policy_KL: 0.00392
Policy_SD: 0.658
Steps: 1.15e+04
TotalSteps: 5.28e+06
VF_0_ExplainedVarNew: 0.887
VF_0_ExplainedVarOld: 0.882
VF_0_Loss : 0.000874


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0050   0.0029   0.0111   9.0125   2.2441   3.0464
ADVA:  (18490,) (34949,) 0.5290566253683939
ADV1:  0.001969556826519326 0.0014355097400065118 0.016385065897137823 0.14809146475740073 -0.14287957906240573
ADVB:  (20965,) (34949,) 0.599874102263298
ADV2:  0.2578275806308209 0.4777570634049652 0.5790110038660625 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5464   0.2127   0.9150  34.6670  14.5135  10.6446
***** Episode 15591, Mean R = -28.6  Std R = 8.9  Min R = -53.3
PolicyLoss: 3.78
Policy_Entropy: 0.0879
Policy_KL: 0.00381
Policy_SD: 0.661
Steps: 1.16e+04
TotalSteps: 5.29e+06
VF_0_ExplainedVarNew: 0.917
VF_0_ExplainedVarOld: 0.912
VF_0_Loss : 0.000881


Dynamics: M

w        |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.05   -0.05   -0.05 |    0.07    0.05    0.05
a_f      |    0.01   -0.17 |    0.70    1.82 |   -1.45   -3.12 |    1.49    3.14
w_f      |    0.00    0.01   -0.00 |    0.02    0.02    0.01 |   -0.05   -0.04   -0.04 |    0.07    0.05    0.04
w_rewards |   -0.00 |    0.02 |   -0.32 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |   -0.16 |    2.84 |  -50.00 |    0.00
theta_cv |    0.36 |    0.27 |    0.00 |    1.60
seeker_angles |   -0.00    0.00 |    0.10    0.11 |   -0.99   -1.00 |    1.00    0.99
cs_angles | -0.0025  0.0002 |  0.1044  0.1133 | -0.9937 -0.9982 |  0.9973  0.9897
optical_flow | -0.0002  0.0002 |  0.0238  0.0299 | -0.9585 -1.0972 |  0.8662  1.3969
v_err    | -0.0114 |  0.0618 | -0.4960 |  0.3874
landing_rewards |    6.45 |    4.78 |    0.00 |   10.00
landing_margin |    0.37 |    6.69 |   -0.07 |  118.04
tracking_rewards |  -30.83 |    9.81 | -113.81 |  -15.78
steps    |     376 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5110   0.2234   1.0352  34.6670  14.5135  10.6446
Update Cnt = 520    ET =   1332.4   Stats:  Mean, Std, Min, Max
r_f      |   13.62    1.44   -4.00 |  181.75  178.20  204.85 | -397.29 -397.51 -391.52 |  376.54  391.54  387.65
v_f      |    0.00    0.00   -0.00 |    0.05    0.05    0.06 |   -0.13   -0.12   -0.15 |    0.15    0.14    0.14
r_i      |   68.10  -22.37  -10.03 |  689.26  661.65  757.61 |-1300.75-1286.51-1324.27 | 1328.04 1342.75 1293.43
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.66 |    0.33 |    0.01 |    2.05
norm_vf  |    0.09 |    0.03 |    0.03 |    0.18
gs_f     |    1.12 |    1.48 |    0.01 |   13.39
thrust   |    0.00   -0.00   -0.00 |    0.76    0.74    0.74 |   -3.43   -3.44   -3.46 |    3.43    3.45    3.46
norm_thrust |    1.07 |    0.72 |    0.00 |    3.46
fuel     |    2.06 |    0.29 |    1.59 |    3.34
rewards  |  -29.17 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5048   0.2239   0.9856  34.6670  14.5135  10.6446
***** Episode 16428, Mean R = -26.8  Std R = 8.3  Min R = -44.8
PolicyLoss: 3.31
Policy_Entropy: 0.0994
Policy_KL: 0.00406
Policy_SD: 0.634
Steps: 1.16e+04
TotalSteps: 5.61e+06
VF_0_ExplainedVarNew: 0.891
VF_0_ExplainedVarOld: 0.887
VF_0_Loss : 0.000913


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0028   9.0125   2.2441   3.0464
ADVA:  (18775,) (35250,) 0.5326241134751774
ADV1:  0.0 -0.0011516522026321062 0.015724706117153337 0.10658471922966306 -0.15773639082365165
ADVB:  (18210,) (35250,) 0.516595744680851
ADV2:  0.049373748932528924 0.36400933407334957 0.5119761125439312 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5610   0.3092   1.1873  34.6670  14.5135  10.6446
***** Episode 16459, Mean R = -26.9  Std R = 9.4  Min R = -59.6
PolicyLoss: 3.19
Policy_Entropy: 0.099
Policy_KL: 0.00476
Policy_SD: 0.629
Steps: 1.19e+04
TotalSteps: 5.62e+06
V

ADVA:  (19007,) (35090,) 0.5416642918210316
ADV1:  0.000252541003395685 -0.0007396930881212454 0.01714133818535183 0.10995875684567419 -0.13826006928059958
ADVB:  (19516,) (35090,) 0.5561698489598176
ADV2:  0.134801315071167 0.39013536308416835 0.5254347255380455 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8914   0.4294   1.9613  34.6670  14.5135  10.6446
***** Episode 16676, Mean R = -25.7  Std R = 9.3  Min R = -50.2
PolicyLoss: 3.17
Policy_Entropy: 0.101
Policy_KL: 0.00653
Policy_SD: 0.634
Steps: 1.18e+04
TotalSteps: 5.7e+06
VF_0_ExplainedVarNew: 0.911
VF_0_ExplainedVarOld: 0.906
VF_0_Loss : 0.0017


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0018   0.0066   9.0125   2.2441   3.0464
ADVA:  (18682,) (34970,) 0.5342293394338004
ADV1:  0.0005846867299090473 7.513243145752883e-05 0.01657580809694557 0.10332842321270741 -0.13778291817605304
ADVB:  (20016,) (34970,) 0.5723763225621962
ADV2:  0.16991747158257592 0.41649402705562877 0.5457057898120893 3.0 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0012   0.0043   9.0125   2.2441   3.0464
ADVA:  (18996,) (35117,) 0.5409345900845745
ADV1:  0.0003296269754986675 0.0003330660038911004 0.016313311846766234 0.19535344916740444 -0.11759057369816862
ADVB:  (18790,) (35117,) 0.5350684853489763
ADV2:  0.09239847028178957 0.40132644316725713 0.5430193136410599 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1789   0.6320   2.5141  34.6670  14.5135  10.6446
***** Episode 16924, Mean R = -27.3  Std R = 10.7  Min R = -49.2
PolicyLoss: 3.34
Policy_Entropy: 0.104
Policy_KL: 0.006
Policy_SD: 0.623
Steps: 1.17e+04
TotalSteps: 5.79e+06
VF_0_ExplainedVarNew: 0.902
VF_0_ExplainedVarOld: 0.897
VF_0_Loss : 0.000855


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0016   0.0063   9.0125   2.2441   3.0464
ADVA:  (19316,) (34931,) 0.5529758667086542
ADV1:  0.0 9.546066955379

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0006   0.0025   9.0125   2.2441   3.0464
ADVA:  (19398,) (34728,) 0.5585694540428473
ADV1:  0.0 -0.001131439127202478 0.042609992528150925 0.19718047637868502 -2.0662604586283386
ADVB:  (17123,) (34728,) 0.49306035475696847
ADV2:  0.0 0.21688009755454016 0.35558315089958836 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2430   0.1049   0.4812  34.6670  14.5135  10.6446
***** Episode 17172, Mean R = -27.3  Std R = 7.9  Min R = -46.4
PolicyLoss: 1.96
Policy_Entropy: 0.106
Policy_KL: 0.00377
Policy_SD: 0.624
Steps: 1.17e+04
TotalSteps: 5.89e+06
VF_0_ExplainedVarNew: 0.861
VF_0_ExplainedVarOld: 0.842
VF_0_Loss : 0.000799


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0079   0.0047   0.0170   9.0125   2.2441   3.0464
ADVA:  (18103,) (35210,) 0.5141437091735303
ADV1:  0.0012978316050046186 0.0024054701897674026 0.021253109795869987 0.19718047637868502 -0.1756151601606822
ADVB:  (19816,) (35210,) 0.56279466060

seeker_angles |    0.00   -0.00 |    0.10    0.10 |   -0.99   -0.99 |    1.00    1.00
cs_angles |  0.0009 -0.0017 |  0.1009  0.1045 | -0.9923 -0.9931 |  0.9971  0.9983
optical_flow | -0.0001 -0.0000 |  0.0244  0.0282 | -1.1816 -0.9811 |  0.9006  1.0284
v_err    | -0.0109 |  0.0613 | -0.4998 |  0.1574
landing_rewards |    7.13 |    4.52 |    0.00 |   10.00
landing_margin |   -0.00 |    0.20 |   -0.07 |    3.53
tracking_rewards |  -28.20 |    8.84 |  -91.59 |  -13.99
steps    |     379 |      20 |     329 |     415
***** Episode 17420, Mean R = -24.8  Std R = 7.5  Min R = -38.9
PolicyLoss: 3.34
Policy_Entropy: 0.108
Policy_KL: 0.00571
Policy_SD: 0.624
Steps: 1.18e+04
TotalSteps: 5.98e+06
VF_0_ExplainedVarNew: 0.902
VF_0_ExplainedVarOld: 0.895
VF_0_Loss : 0.000243


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0015   0.0055   9.0125   2.2441   3.0464
ADVA:  (18707,) (35498,) 0.5269874359118824
ADV1:  0.002905900587902816 0.002140748669990087 0.015174457141764422 0.1175523

thrust   |    0.00    0.00    0.01 |    0.73    0.73    0.72 |   -3.46   -3.36   -3.43 |    3.42    3.36    3.43
norm_thrust |    1.02 |    0.74 |    0.00 |    3.46
fuel     |    1.91 |    0.31 |    1.28 |    3.26
rewards  |  -25.68 |   13.66 | -161.00 |   -8.60
fuel_rewards |   -5.47 |    0.88 |   -9.32 |   -3.67
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    3.12 |   13.83 |    0.00 |  151.46
norm_af  |    1.72 |    0.85 |    0.09 |    3.32
norm_wf  |    0.02 |    0.01 |    0.00 |    0.08
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |   -0.02    0.03   -0.09 |    1.11    0.69    1.80 |   -3.14   -1.56   -3.14 |    3.14    1.56    3.14
w        |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.05   -0.05   -0.05 |    0.06    0.05    0.05
a_f      |    0.04   -0.03 |    0.67

ADVA:  (20018,) (35146,) 0.5695669492972173
ADV1:  0.0011260891235409434 2.74371715389815e-05 0.014158187167228739 0.08080965155052999 -0.11384356639811624
ADVB:  (20159,) (35146,) 0.5735787856370569
ADV2:  0.1559761030229589 0.3775147255894985 0.5014283077359453 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2509   0.0844   0.4160  44.3057  15.5506  10.7326
***** Episode 18009, Mean R = -23.6  Std R = 7.4  Min R = -41.8
PolicyLoss: 2.82
Policy_Entropy: 0.114
Policy_KL: 0.00549
Policy_SD: 0.619
Steps: 1.16e+04
TotalSteps: 6.2e+06
VF_0_ExplainedVarNew: 0.918
VF_0_ExplainedVarOld: 0.913
VF_0_Loss : 0.000552


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0011   0.0040   9.0125   2.2441   3.0464
ADVA:  (19974,) (34905,) 0.5722389342501074
ADV1:  0.0015218331444434158 0.00028690914989960555 0.014662323687078009 0.11902387406971404 -0.11384356639811624
ADVB:  (20495,) (34905,) 0.587165162584157
ADV2:  0.18370603873775088 0.39177958427735227 0.5133117230155176 3

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0036   0.0022   0.0086   9.0125   2.2441   3.0464
ADVA:  (18554,) (34913,) 0.5314352819866526
ADV1:  0.0025468197117771117 0.0011037642196582607 0.015501337244137573 0.10697568293732995 -0.09381612322326952
ADVB:  (21771,) (34913,) 0.6235786096869361
ADV2:  0.3082445804947911 0.4780263258572394 0.5662936090404246 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7392   0.2742   1.2600  44.3057  15.5506  10.7326
***** Episode 18257, Mean R = -27.9  Std R = 10.1  Min R = -49.8
PolicyLoss: 3.27
Policy_Entropy: 0.117
Policy_KL: 0.00376
Policy_SD: 0.617
Steps: 1.17e+04
TotalSteps: 6.3e+06
VF_0_ExplainedVarNew: 0.921
VF_0_ExplainedVarOld: 0.916
VF_0_Loss : 0.00089


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0062   0.0035   0.0135   9.0125   2.2441   3.0464
ADVA:  (18840,) (35044,) 0.5376098618879124
ADV1:  0.003820350047568695

***** Episode 18474, Mean R = -22.7  Std R = 8.8  Min R = -38.6
PolicyLoss: 2.24
Policy_Entropy: 0.12
Policy_KL: 0.00562
Policy_SD: 0.609
Steps: 1.17e+04
TotalSteps: 6.38e+06
VF_0_ExplainedVarNew: 0.923
VF_0_ExplainedVarOld: 0.911
VF_0_Loss : 0.00117


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0049   0.0030   0.0119   9.0125   2.2441   3.0464
ADVA:  (20692,) (34975,) 0.5916225875625447
ADV1:  0.0012997169049735052 0.0005240138079546426 0.026899742423404403 0.27193475958049695 -1.0886906834120893
ADVB:  (19310,) (34975,) 0.552108649035025
ADV2:  0.08089258963055986 0.29701966744806185 0.4389851605370398 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1412   0.0432   0.2477  44.3057  15.5506  10.7326
***** Episode 18505, Mean R = -22.1  Std R = 8.7  Min R = -48.4
PolicyLoss: 2.26
Policy_Entropy: 0.12
Policy_KL: 0.00553
Policy_SD: 0.607
Steps: 1.17e+04
TotalSteps: 6.39e+06
VF_0_ExplainedVarNew: 0.897
VF_0_ExplainedVarOld: 0.889
VF_0_Loss : 0.00111


Dynamics: Max D

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7053   0.2299   1.2365  44.3057  15.5506  10.7326
***** Episode 18722, Mean R = -20.4  Std R = 8.7  Min R = -51.9
PolicyLoss: 2.77
Policy_Entropy: 0.123
Policy_KL: 0.00683
Policy_SD: 0.595
Steps: 1.17e+04
TotalSteps: 6.47e+06
VF_0_ExplainedVarNew: 0.928
VF_0_ExplainedVarOld: 0.924
VF_0_Loss : 0.000779


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0046   0.0028   0.0104   9.0125   2.2441   3.0464
ADVA:  (19926,) (34948,) 0.5701613826256152
ADV1:  0.0004310732408135433 -0.0009682040749311811 0.014545499437688063 0.12185636185011803 -0.13122606208422158
ADVB:  (19662,) (34948,) 0.5626073022776696
ADV2:  0.12046016386148972 0.3494538572128058 0.4802976148705788 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5744   0.3001   1.3337  44.3057  15.5506  10.7326
***** Episode 18753, Mean R = -24.2  Std R = 9.3  Min R = -45.8
PolicyLoss: 2.59
Policy_Entropy: 0.123
Policy_KL: 0.00663
Policy_SD: 0.6
Steps: 1.16e+04
TotalSte

***** Episode 18970, Mean R = -21.1  Std R = 9.5  Min R = -58.0
PolicyLoss: 2.86
Policy_Entropy: 0.124
Policy_KL: 0.00594
Policy_SD: 0.593
Steps: 1.15e+04
TotalSteps: 6.56e+06
VF_0_ExplainedVarNew: 0.909
VF_0_ExplainedVarOld: 0.904
VF_0_Loss : 0.000833


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0026   9.0125   2.2441   3.0464
ADVA:  (19591,) (34874,) 0.5617652119057177
ADV1:  0.0016291022513155624 0.00023892534572265455 0.014304967350081743 0.10072045495037124 -0.1129137855959904
ADVB:  (20565,) (34874,) 0.5896943281527786
ADV2:  0.19458660824462287 0.3890550684048384 0.5105872453257959 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3631   0.1264   0.6568  44.3057  15.5506  10.7326
***** Episode 19001, Mean R = -21.5  Std R = 9.7  Min R = -54.9
PolicyLoss: 2.73
Policy_Entropy: 0.124
Policy_KL: 0.0066
Policy_SD: 0.594
Steps: 1.18e+04
TotalSteps: 6.58e+06
VF_0_ExplainedVarNew: 0.922
VF_0_ExplainedVarOld: 0.916
VF_0_Loss : 0.000509


ValFun  Gra

attitude |    0.05   -0.05   -0.12 |    1.09    0.62    1.82 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.04   -0.08 |    0.63    1.84 |   -1.47   -3.14 |    1.46    3.10
w_f      |    0.00    0.00   -0.00 |    0.02    0.01    0.01 |   -0.04   -0.03   -0.03 |    0.05    0.03    0.03
w_rewards |   -0.00 |    0.00 |   -0.03 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |   -0.16 |    2.84 |  -50.00 |    0.00
theta_cv |    0.32 |    0.26 |    0.00 |    1.63
seeker_angles |    0.00   -0.00 |    0.09    0.09 |   -0.99   -1.00 |    0.99    1.00
cs_angles |  0.0006 -0.0001 |  0.0930  0.0949 | -0.9862 -0.9977 |  0.9941  0.9976
optical_flow | -0.0001  0.0000 |  0.0272  0.0274 | -1.1119 -1.0405 |  1.3208  1.3479
v_err    | -0.0106 |  0.0595 | -0.4999 |  0.4908
landing_rewards |    8.19 |    3.85 |    0.00 |   10.00
landing_margin |    1

ADVA:  (21260,) (34949,) 0.608314973246731
ADV1:  0.0007448783953995022 -0.001118559533053081 0.01335123115468914 0.07019635126301085 -0.10359020353146224
ADVB:  (18752,) (34949,) 0.5365532633265616
ADV2:  0.07089840984026655 0.29373871703971494 0.41748414757621494 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3464   0.1109   0.5939  44.3057  15.5506  10.7326
Update Cnt = 630    ET =   1287.5   Stats:  Mean, Std, Min, Max
r_f      |   -5.48  -10.28   -1.21 |  192.30  159.24  202.10 | -386.32 -390.87 -380.04 |  387.37  365.39  390.75
v_f      |   -0.00    0.00    0.00 |    0.05    0.05    0.05 |   -0.15   -0.13   -0.10 |    0.11    0.10    0.11
r_i      |  -28.74  -56.11    7.06 |  685.32  626.24  781.22 |-1324.62-1373.44-1339.46 | 1285.92 1270.46 1352.42
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.08   -0.10 |    0.09    0.10    0.09
norm_rf  |    0.45 |    0.19 |    0.06 |    1.36
norm_vf  |    0.08 |    0.02 |    0.04 |    0.16
gs_f     

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0013   0.0045   9.0125   2.2441   3.0464
ADVA:  (21168,) (35081,) 0.6034035517801659
ADV1:  0.0004251725028199503 -0.000973389254465804 0.013762916792883684 0.0744223479421326 -0.09320656942846582
ADVB:  (19191,) (35081,) 0.5470482597417405
ADV2:  0.09380109567837168 0.3419592159598829 0.4711306763360786 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5095   0.1895   0.8110  44.3057  15.5506  10.7326
***** Episode 19838, Mean R = -18.7  Std R = 7.0  Min R = -36.0
PolicyLoss: 2.51
Policy_Entropy: 0.133
Policy_KL: 0.00496
Policy_SD: 0.575
Steps: 1.16e+04
TotalSteps: 6.89e+06
VF_0_ExplainedVarNew: 0.938
VF_0_ExplainedVarOld: 0.935
VF_0_Loss : 0.000553


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0025   9.0125   2.2441   3.0464
ADVA:  (21536,) (35059,) 0.6142787871873128
ADV1:  0.00210682682661335

***** Episode 20055, Mean R = -21.4  Std R = 9.3  Min R = -42.2
PolicyLoss: 2.72
Policy_Entropy: 0.135
Policy_KL: 0.00557
Policy_SD: 0.584
Steps: 1.17e+04
TotalSteps: 6.97e+06
VF_0_ExplainedVarNew: 0.948
VF_0_ExplainedVarOld: 0.943
VF_0_Loss : 0.000606


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0019   0.0069   9.0125   2.2441   3.0464
ADVA:  (20803,) (34965,) 0.5949663949663949
ADV1:  0.0017376870332207472 0.0006529684919719271 0.01296437158950503 0.08431890030342283 -0.11392385090210977
ADVB:  (21054,) (34965,) 0.6021450021450021
ADV2:  0.18282744851813346 0.40118192977334305 0.522735130697688 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5142   0.2190   0.9358  44.3057  15.5506  10.7326
***** Episode 20086, Mean R = -20.0  Std R = 6.6  Min R = -35.9
PolicyLoss: 2.67
Policy_Entropy: 0.134
Policy_KL: 0.00606
Policy_SD: 0.591
Steps: 1.15e+04
TotalSteps: 6.98e+06
VF_0_ExplainedVarNew: 0.957
VF_0_ExplainedVarOld: 0.951
VF_0_Loss : 0.000826


ValFun  Gra

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3685   0.1473   0.6843  44.3057  15.5506  10.7326
***** Episode 20303, Mean R = -16.0  Std R = 6.7  Min R = -29.7
PolicyLoss: 2.25
Policy_Entropy: 0.138
Policy_KL: 0.00836
Policy_SD: 0.568
Steps: 1.17e+04
TotalSteps: 7.06e+06
VF_0_ExplainedVarNew: 0.931
VF_0_ExplainedVarOld: 0.929
VF_0_Loss : 0.000335


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0015   9.0125   2.2441   3.0464
ADVA:  (22273,) (35075,) 0.6350106913756237
ADV1:  0.0002971900409555108 -0.0005674216482208834 0.012090049016881621 0.08851586358432972 -0.1101844381667022
ADVB:  (17802,) (35075,) 0.5075409836065574
ADV2:  0.013964874106570878 0.30070906963831523 0.44256428930845176 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4174   0.1465   0.6996  44.3057  15.5506  10.7326
***** Episode 20334, Mean R = -18.1  Std R = 7.6  Min R = -36.4
PolicyLoss: 2.35
Policy_Entropy: 0.138
Policy_KL: 0.00718
Policy_SD: 0.567
Steps: 1.17e+04
Tota

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0783   0.2053   1.3459  44.3057  15.5506  10.7326
***** Episode 20551, Mean R = -16.5  Std R = 6.7  Min R = -39.3
PolicyLoss: 3.11
Policy_Entropy: 0.139
Policy_KL: 0.00705
Policy_SD: 0.571
Steps: 1.17e+04
TotalSteps: 7.16e+06
VF_0_ExplainedVarNew: 0.93
VF_0_ExplainedVarOld: 0.925
VF_0_Loss : 0.00014


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0021   9.0125   2.2441   3.0464
ADVA:  (20065,) (35063,) 0.5722556541083194
ADV1:  0.000973482285365538 0.00010121177352941907 0.01252126410090306 0.11280129240134656 -0.1449126104077506
ADVB:  (21302,) (35063,) 0.6075350084134272
ADV2:  0.21329639604050019 0.4093435777922756 0.5159914129841928 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8108   0.2871   1.4841  44.3057  15.5506  10.7326
***** Episode 20582, Mean R = -17.9  Std R = 6.1  Min R = -31.1
PolicyLoss: 2.65
Policy_Entropy: 0.14
Policy_KL: 0.00518
Policy_SD: 0.566
Steps: 1.18e+04
TotalSteps: 

seeker_angles |    0.00   -0.00 |    0.09    0.09 |   -0.99   -1.00 |    1.00    0.99
cs_angles |  0.0017 -0.0003 |  0.0884  0.0888 | -0.9887 -0.9965 |  0.9967  0.9940
optical_flow |  0.0000  0.0001 |  0.0261  0.0264 | -1.2020 -1.0640 |  1.0433  1.1126
v_err    | -0.0104 |  0.0590 | -0.4527 |  0.1145
landing_rewards |    8.90 |    3.12 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.06 |    0.04
tracking_rewards |  -22.55 |    5.49 |  -43.02 |  -12.40
steps    |     380 |      20 |     335 |     417
***** Episode 20830, Mean R = -18.9  Std R = 6.9  Min R = -37.0
PolicyLoss: 2.67
Policy_Entropy: 0.142
Policy_KL: 0.00686
Policy_SD: 0.568
Steps: 1.17e+04
TotalSteps: 7.26e+06
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.000105


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0065   0.0039   0.0148   9.0125   2.2441   3.0464
ADVA:  (19911,) (35173,) 0.5660876240297956
ADV1:  1.3461451536089103e-05 -0.0009507148565591002 0.012502918044627467 0.076

attitude |   -0.07    0.01   -0.10 |    1.16    0.67    1.90 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00    0.00   -0.00 |    0.01    0.01    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.02   -0.10 |    0.67    1.92 |   -1.47   -3.14 |    1.50    3.12
w_f      |    0.00    0.00   -0.00 |    0.02    0.02    0.01 |   -0.04   -0.04   -0.03 |    0.05    0.04    0.03
w_rewards |   -0.00 |    0.00 |   -0.01 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.29 |    0.26 |    0.00 |    1.37
seeker_angles |    0.00   -0.00 |    0.09    0.09 |   -0.99   -0.97 |    0.97    1.00
cs_angles |  0.0012 -0.0011 |  0.0872  0.0905 | -0.9921 -0.9732 |  0.9672  0.9952
optical_flow | -0.0002  0.0002 |  0.0250  0.0262 | -1.1107 -1.2145 |  1.5358  1.2936
v_err    | -0.0104 |  0.0587 | -0.4522 |  0.1329
landing_rewards |    8.87 |    3.16 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (19559,) (35549,) 0.5501983178148471
ADV1:  0.0020274324282346483 0.0013536476388276942 0.010094696412794162 0.0739902942379316 -0.09371026378071506
ADVB:  (23198,) (35549,) 0.6525640664997608
ADV2:  0.2851689439351538 0.45091964653535904 0.5392888422160865 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5443   0.2087   0.9868  44.3057  15.5506  10.7326
Update Cnt = 690    ET =   1411.9   Stats:  Mean, Std, Min, Max
r_f      |   -2.86  -11.91   -5.21 |  181.01  180.91  201.77 | -380.79 -356.03 -374.80 |  394.54  369.36  386.27
v_f      |   -0.00   -0.00    0.01 |    0.04    0.05    0.05 |   -0.12   -0.10   -0.12 |    0.09    0.11    0.10
r_i      |   -1.98  -43.14  -35.38 |  657.62  703.29  756.77 |-1304.86-1296.54-1320.26 | 1284.00 1319.81 1345.77
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.08   -0.09   -0.09 |    0.10    0.09    0.10
norm_rf  |    0.41 |    0.17 |    0.05 |    0.94
norm_vf  |    0.08 |    0.02 |    0.03 |    0.13
gs_f     |

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0016   9.0125   2.2441   3.0464
ADVA:  (20827,) (35095,) 0.5934463598803248
ADV1:  0.0013464973520362807 0.0004217439636866378 0.011299929417400748 0.09912765355679869 -0.0875981236182396
ADVB:  (20360,) (35095,) 0.5801396210286366
ADV2:  0.1368384857901595 0.35612954242585076 0.5006887251342259 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1919   0.0674   0.3364  44.3057  15.5506  10.7326
***** Episode 21698, Mean R = -18.2  Std R = 6.8  Min R = -36.0
PolicyLoss: 2.35
Policy_Entropy: 0.147
Policy_KL: 0.00583
Policy_SD: 0.565
Steps: 1.16e+04
TotalSteps: 7.59e+06
VF_0_ExplainedVarNew: 0.96
VF_0_ExplainedVarOld: 0.956
VF_0_Loss : 0.0001


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0016   0.0059   9.0125   2.2441   3.0464
ADVA:  (20663,) (35085,) 0.5889411429385777
ADV1:  0.0006532463341845825 

***** Episode 21915, Mean R = -18.9  Std R = 14.6  Min R = -84.3
PolicyLoss: 2.37
Policy_Entropy: 0.149
Policy_KL: 0.00457
Policy_SD: 0.571
Steps: 1.17e+04
TotalSteps: 7.67e+06
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.959
VF_0_Loss : 0.00018


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0052   0.0030   0.0105   9.0125   2.2441   3.0464
ADVA:  (18773,) (35393,) 0.5304156189076936
ADV1:  0.0013826457642834925 0.0005922326833937466 0.011504791023180591 0.09820272825247105 -0.09287424851958215
ADVB:  (22032,) (35393,) 0.622495973780126
ADV2:  0.23819958338871136 0.43560655993258446 0.5524970466456407 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4958   0.2100   0.9964  44.3057  15.5506  10.7326
***** Episode 21946, Mean R = -16.6  Std R = 6.1  Min R = -31.3
PolicyLoss: 2.68
Policy_Entropy: 0.148
Policy_KL: 0.00444
Policy_SD: 0.571
Steps: 1.18e+04
TotalSteps: 7.69e+06
VF_0_ExplainedVarNew: 0.962
VF_0_ExplainedVarOld: 0.955
VF_0_Loss : 0.00014


ValFun  Gra

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3393   0.1212   0.5940  44.3057  15.5506  10.7326
***** Episode 22163, Mean R = -18.1  Std R = 6.7  Min R = -33.1
PolicyLoss: 2.07
Policy_Entropy: 0.151
Policy_KL: 0.00458
Policy_SD: 0.567
Steps: 1.16e+04
TotalSteps: 7.77e+06
VF_0_ExplainedVarNew: 0.937
VF_0_ExplainedVarOld: 0.933
VF_0_Loss : 0.000155


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0021   9.0125   2.2441   3.0464
ADVA:  (20403,) (34811,) 0.5861078394760277
ADV1:  0.0008690776779828671 -0.0003995109437766767 0.011555441431503978 0.07027999550346492 -0.09983493201678671
ADVB:  (20429,) (34811,) 0.5868547298267789
ADV2:  0.15655590617931417 0.3473863901843545 0.46459761489403517 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5753   0.2873   1.2022  44.3057  15.5506  10.7326
***** Episode 22194, Mean R = -18.3  Std R = 6.4  Min R = -31.1
PolicyLoss: 2.25
Policy_Entropy: 0.15
Policy_KL: 0.00606
Policy_SD: 0.571
Steps: 1.16e+04
TotalS

ADVA:  (18963,) (35249,) 0.5379727084456296
ADV1:  0.0 -0.0007312451022467607 0.009316853094099287 0.06585043768088028 -0.09986597390163654
ADVB:  (17926,) (35249,) 0.5085534341399756
ADV2:  0.016002536546010058 0.30950843616598955 0.49339207220328923 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8248   0.3378   1.5832  44.3057  15.5506  10.7326
***** Episode 22411, Mean R = -16.3  Std R = 9.1  Min R = -53.1
PolicyLoss: 2.29
Policy_Entropy: 0.151
Policy_KL: 0.00494
Policy_SD: 0.561
Steps: 1.18e+04
TotalSteps: 7.86e+06
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.000493


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   9.0125   2.2441   3.0464
ADVA:  (20034,) (35295,) 0.5676158096047599
ADV1:  0.0 -0.0006310489079083772 0.009770261294482914 0.06585043768088028 -0.08560422857965394
ADVB:  (18164,) (35295,) 0.5146338008216461
ADV2:  0.02528692461167034 0.3028190170477442 0.4797435494420081 3.0 0.0
Policy  Gradients: u/sd

attitude |   -0.09    0.06    0.06 |    1.15    0.61    1.84 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00    0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.06    0.05    0.05
a_f      |    0.06    0.05 |    0.62    1.84 |   -1.42   -3.13 |    1.55    3.11
w_f      |    0.01    0.00    0.00 |    0.02    0.01    0.01 |   -0.03   -0.03   -0.04 |    0.06    0.03    0.03
w_rewards |   -0.00 |    0.01 |   -0.15 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.28 |    0.26 |    0.00 |    1.63
seeker_angles |    0.00   -0.00 |    0.09    0.08 |   -1.00   -0.99 |    0.99    0.99
cs_angles |  0.0037 -0.0013 |  0.0912  0.0841 | -0.9950 -0.9929 |  0.9876  0.9938
optical_flow |  0.0000  0.0001 |  0.0266  0.0236 | -1.0988 -0.8608 |  1.5336  0.9519
v_err    | -0.0107 |  0.0598 | -0.4528 |  0.1187
landing_rewards |    9.16 |    2.77 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4879   0.1993   0.8371  44.3057  15.5506  10.7326
Update Cnt = 740    ET =   1439.5   Stats:  Mean, Std, Min, Max
r_f      |    3.71   -7.51   -9.50 |  188.68  168.08  191.57 | -382.68 -382.08 -392.24 |  392.12  358.45  398.26
v_f      |   -0.00   -0.00    0.00 |    0.05    0.04    0.05 |   -0.13   -0.10   -0.12 |    0.09    0.12    0.14
r_i      |    6.06  -11.91  -46.31 |  682.12  660.50  756.21 |-1251.07-1367.14-1311.68 | 1364.07 1227.73 1301.99
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.34 |    0.15 |    0.01 |    1.01
norm_vf  |    0.08 |    0.02 |    0.02 |    0.14
gs_f     |    1.28 |    1.79 |    0.01 |   17.56
thrust   |   -0.00    0.00    0.00 |    0.67    0.69    0.69 |   -3.28   -3.35   -3.38 |    3.46    3.23    3.36
norm_thrust |    0.92 |    0.74 |    0.00 |    3.46
fuel     |    1.65 |    0.23 |    1.17 |    2.84
rewards  |  -16.02 |

ADVA:  (20151,) (35288,) 0.5710439809566992
ADV1:  0.0 -0.0003598145768783009 0.008771156453314539 0.06290814345489931 -0.0802251606471785
ADVB:  (18534,) (35288,) 0.5252210383133077
ADV2:  0.05217259406478629 0.33032895954146896 0.4950761180803736 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6014   0.3168   1.2689  44.3057  15.5506  10.7326
***** Episode 23248, Mean R = -15.3  Std R = 6.3  Min R = -33.2
PolicyLoss: 2.33
Policy_Entropy: 0.156
Policy_KL: 0.00581
Policy_SD: 0.56
Steps: 1.19e+04
TotalSteps: 8.18e+06
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.000481


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0048   9.0125   2.2441   3.0464
ADVA:  (19881,) (35406,) 0.561514997458058
ADV1:  0.0 -0.0016449691855827439 0.00965993951388711 0.06290814345489931 -0.09051883619335854
ADVB:  (17097,) (35406,) 0.4828842569056092
ADV2:  0.0 0.2698527252374647 0.44227022466955107 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   9.0125   2.2441   3.0464
ADVA:  (19525,) (35358,) 0.5522088353413654
ADV1:  0.0012256665730798824 0.00024022910857812493 0.010224451870055695 0.07283364329094 -0.10574138878039635
ADVB:  (22038,) (35358,) 0.623281859833701
ADV2:  0.21859738436468476 0.4015241906031656 0.5006689723383558 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7901   0.3973   1.6092  44.3057  15.5506  10.7326
***** Episode 23496, Mean R = -15.7  Std R = 5.7  Min R = -28.9
PolicyLoss: 2.39
Policy_Entropy: 0.158
Policy_KL: 0.00602
Policy_SD: 0.562
Steps: 1.18e+04
TotalSteps: 8.27e+06
VF_0_ExplainedVarNew: 0.958
VF_0_ExplainedVarOld: 0.954
VF_0_Loss : 0.00244


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0004   9.0125   2.2441   3.0464
ADVA:  (20418,) (35204,) 0.5799909101238495
ADV1:  0.0004241150618123496 -0.0008003655241446608 0.010731487088033743 0.07283364329094 -0.10574138878039635
ADVB:  (201

***** Episode 23713, Mean R = -15.2  Std R = 6.3  Min R = -31.0
PolicyLoss: 2.6
Policy_Entropy: 0.156
Policy_KL: 0.034
Policy_SD: 0.567
Steps: 1.18e+04
TotalSteps: 8.35e+06
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.000435


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0036   0.0014   0.0073   9.0125   2.2441   3.0464
ADVA:  (20221,) (35207,) 0.5743460107365013
ADV1:  0.00047071201612524337 -0.00011085612551123419 0.009473786414002686 0.05127950314696095 -0.096457611182722
ADVB:  (19490,) (35207,) 0.5535830942710257
ADV2:  0.09683577060072586 0.3489737729688293 0.4951411793408875 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.4834   3.5852  15.1799  44.3057  15.5506  10.7326
***** Episode 23744, Mean R = -17.1  Std R = 13.6  Min R = -84.1
PolicyLoss: 2.33
Policy_Entropy: 0.157
Policy_KL: 0.00567
Policy_SD: 0.564
Steps: 1.16e+04
TotalSteps: 8.36e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.00112


ValFun  Gradie

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6285   0.2914   1.1903  44.3057  15.5506  10.7326
***** Episode 23961, Mean R = -16.7  Std R = 7.4  Min R = -40.6
PolicyLoss: 2.41
Policy_Entropy: 0.158
Policy_KL: 0.00462
Policy_SD: 0.566
Steps: 1.18e+04
TotalSteps: 8.45e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.00123


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0050   0.0030   0.0101   9.0125   2.2441   3.0464
ADVA:  (20975,) (35178,) 0.5962533401557791
ADV1:  0.00014675716585022328 -0.0008959554676770131 0.011027423960037093 0.0822800581895109 -0.0802385031752234
ADVB:  (18787,) (35178,) 0.5340553755187901
ADV2:  0.06579136676745097 0.3123161631618169 0.4541213701346696 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2126   0.0703   0.3517  44.3057  15.5506  10.7326
***** Episode 23992, Mean R = -15.7  Std R = 5.4  Min R = -28.6
PolicyLoss: 2.16
Policy_Entropy: 0.158
Policy_KL: 0.00419
Policy_SD: 0.568
Steps: 1.16e+04
TotalSte

w        |    0.00    0.00   -0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.03   -0.02 |    0.65    1.91 |   -1.46   -3.13 |    1.52    3.13
w_f      |    0.00   -0.00    0.00 |    0.02    0.01    0.01 |   -0.04   -0.03   -0.03 |    0.05    0.03    0.05
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.26 |    0.26 |    0.00 |    1.56
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -1.00   -0.98 |    1.00    0.99
cs_angles |  0.0010  0.0015 |  0.0835  0.0835 | -0.9957 -0.9821 |  0.9983  0.9945
optical_flow |  0.0001  0.0001 |  0.0272  0.0243 | -1.1414 -0.9605 |  1.0472  1.0360
v_err    | -0.0113 |  0.0593 | -0.4519 |  0.1213
landing_rewards |    8.90 |    3.12 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.06 |    0.05
tracking_rewards |  -19.54 |    5.22 |  -43.85 |  -10.71
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3113   0.1251   0.5719  44.3057  15.5506  10.7326
Update Cnt = 790    ET =   1469.9   Stats:  Mean, Std, Min, Max
r_f      |    6.84    8.22  -15.09 |  186.13  160.90  210.23 | -383.40 -397.68 -397.24 |  385.91  394.19  390.01
v_f      |   -0.00   -0.00    0.00 |    0.05    0.04    0.05 |   -0.13   -0.10   -0.11 |    0.12    0.09    0.11
r_i      |   -0.57  -12.79  -78.22 |  700.12  641.11  761.24 |-1299.64-1312.41-1347.07 | 1386.45 1264.82 1268.03
v_i      |    0.00   -0.00    0.01 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.08    0.09
norm_rf  |    0.32 |    0.13 |    0.04 |    0.84
norm_vf  |    0.08 |    0.02 |    0.04 |    0.14
gs_f     |    1.22 |    1.81 |    0.01 |   13.92
thrust   |    0.00    0.00    0.00 |    0.69    0.71    0.68 |   -3.45   -3.40   -3.43 |    3.44    3.41    3.44
norm_thrust |    0.93 |    0.75 |    0.00 |    3.46
fuel     |    1.63 |    0.23 |    1.19 |    2.67
rewards  |  -15.43 |

ADVA:  (21435,) (35073,) 0.6111538790522624
ADV1:  0.0012187099989566866 -0.0002618548499942767 0.011569414380589933 0.07227060307643451 -0.07878048074654133
ADVB:  (20759,) (35073,) 0.591879793573404
ADV2:  0.17491794959996473 0.36572063709993075 0.46633680804781025 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2088   0.5840   2.3319  44.3057  15.5506  10.7326
***** Episode 24798, Mean R = -15.6  Std R = 4.8  Min R = -28.1
PolicyLoss: 2.26
Policy_Entropy: 0.162
Policy_KL: 0.00484
Policy_SD: 0.55
Steps: 1.16e+04
TotalSteps: 8.76e+06
VF_0_ExplainedVarNew: 0.958
VF_0_ExplainedVarOld: 0.954
VF_0_Loss : 0.000602


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0009   0.0048   9.0125   2.2441   3.0464
ADVA:  (22459,) (35181,) 0.6383843551917228
ADV1:  0.0013294963209932766 -0.00022123402715951406 0.01214922653993 0.07227060307643451 -0.08986583890729133
ADVB:  (20458,) (35181,) 0.5815070634717603
ADV2:  0.14783196148848177 0.3493421893432637 0.4525750321181295 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0062   0.0030   0.0114   9.0125   2.2441   3.0464
ADVA:  (21461,) (34944,) 0.614154075091575
ADV1:  0.0017514334581993493 0.0009267370225611121 0.010256646353957823 0.06909884907295155 -0.0974074313180589
ADVB:  (22024,) (34944,) 0.6302655677655677
ADV2:  0.22258139469261967 0.41216476590427076 0.5167289653804283 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9618   0.5185   2.1321  44.3057  15.5506  10.7326
***** Episode 25046, Mean R = -14.6  Std R = 5.6  Min R = -28.5
PolicyLoss: 2.37
Policy_Entropy: 0.164
Policy_KL: 0.00492
Policy_SD: 0.562
Steps: 1.15e+04
TotalSteps: 8.86e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.00196


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0018   0.0059   9.0125   2.2441   3.0464
ADVA:  (21332,) (34992,) 0.6096250571559213
ADV1:  0.0009273070938166476 7.817154444514329e-05 0.010302749023396871 0.06909884907295155 -0.0974074313180589
ADVB:  (

***** Episode 25263, Mean R = -12.8  Std R = 4.5  Min R = -26.6
PolicyLoss: 1.99
Policy_Entropy: 0.165
Policy_KL: 0.00609
Policy_SD: 0.545
Steps: 1.16e+04
TotalSteps: 8.94e+06
VF_0_ExplainedVarNew: 0.948
VF_0_ExplainedVarOld: 0.945
VF_0_Loss : 0.000882


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0007   0.0032   9.0125   2.2441   3.0464
ADVA:  (20402,) (35000,) 0.5829142857142857
ADV1:  0.00040063434525684435 -0.0004298182157604548 0.00916041676419 0.06339647799730305 -0.0906250010930782
ADVB:  (19958,) (35000,) 0.5702285714285714
ADV2:  0.12386352570250528 0.33413995972985366 0.4524550454796717 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4973   0.2572   1.0956  44.3057  15.5506  10.7326
***** Episode 25294, Mean R = -15.3  Std R = 6.5  Min R = -32.4
PolicyLoss: 2.1
Policy_Entropy: 0.165
Policy_KL: 0.00425
Policy_SD: 0.556
Steps: 1.17e+04
TotalSteps: 8.95e+06
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.00159


ValFun  Gradie

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4169   0.1770   0.8206  44.3057  15.5506  10.7326
***** Episode 25511, Mean R = -11.8  Std R = 4.7  Min R = -27.2
PolicyLoss: 2.1
Policy_Entropy: 0.165
Policy_KL: 0.00587
Policy_SD: 0.557
Steps: 1.18e+04
TotalSteps: 9.03e+06
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.000694


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0001   0.0007   9.0125   2.2441   3.0464
ADVA:  (20292,) (35043,) 0.5790600119852752
ADV1:  0.0013199561344052037 0.0006117882062937383 0.009411722390043023 0.08989559823965154 -0.07684985681790459
ADVB:  (21765,) (35043,) 0.6210940844105813
ADV2:  0.2060886396321738 0.39221203009756445 0.5092549466769032 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8899   0.4581   1.8645  44.3057  15.5506  10.7326
***** Episode 25542, Mean R = -15.9  Std R = 7.7  Min R = -39.7
PolicyLoss: 2.29
Policy_Entropy: 0.164
Policy_KL: 0.00496
Policy_SD: 0.567
Steps: 1.15e+04
TotalSte

seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -0.98 |    1.00    0.98
cs_angles |  0.0027  0.0012 |  0.0778  0.0838 | -0.9775 -0.9807 |  0.9978  0.9810
optical_flow | -0.0001  0.0001 |  0.0257  0.0228 | -1.0246 -1.0518 |  1.0537  1.1403
v_err    | -0.0108 |  0.0597 | -0.4523 |  0.1132
landing_rewards |    9.35 |    2.46 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -18.58 |    4.72 |  -35.71 |  -10.30
steps    |     377 |      20 |     334 |     416
***** Episode 25790, Mean R = -14.3  Std R = 5.7  Min R = -30.4
PolicyLoss: 2.14
Policy_Entropy: 0.167
Policy_KL: 0.00633
Policy_SD: 0.56
Steps: 1.18e+04
TotalSteps: 9.14e+06
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.00045


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0005   0.0017   9.0125   2.2441   3.0464
ADVA:  (19509,) (35166,) 0.5547688107831428
ADV1:  2.4867720931708026e-05 -0.0001031295840394523 0.008390261961665207 0.060329

attitude |   -0.08   -0.06    0.06 |    1.19    0.63    1.85 |   -3.14   -1.51   -3.14 |    3.14    1.53    3.14
w        |    0.00    0.00   -0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.06    0.08 |    0.63    1.84 |   -1.48   -3.08 |    1.48    3.14
w_f      |    0.00    0.00   -0.00 |    0.02    0.01    0.01 |   -0.04   -0.02   -0.04 |    0.05    0.03    0.05
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.26 |    0.26 |    0.00 |    1.36
seeker_angles |    0.00   -0.00 |    0.08    0.09 |   -1.00   -0.97 |    1.00    0.97
cs_angles |  0.0002 -0.0013 |  0.0836  0.0853 | -0.9953 -0.9674 |  0.9996  0.9717
optical_flow | -0.0001 -0.0000 |  0.0260  0.0235 | -1.1665 -1.0908 |  1.3455  1.3558
v_err    | -0.0105 |  0.0592 | -0.4524 |  0.1220
landing_rewards |    9.19 |    2.72 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1622   0.6538   2.3731  44.3057  15.5506  10.7326
Update Cnt = 850    ET =   1485.1   Stats:  Mean, Std, Min, Max
r_f      |   -2.37  -18.11  -11.49 |  193.11  166.41  195.25 | -379.13 -370.50 -387.18 |  384.72  356.12  388.25
v_f      |   -0.00    0.00    0.00 |    0.05    0.04    0.05 |   -0.12   -0.11   -0.11 |    0.13    0.13    0.12
r_i      |    5.17  -66.06  -27.88 |  708.58  634.88  761.14 |-1316.03-1310.50-1286.97 | 1381.74 1223.83 1273.59
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.08   -0.10 |    0.10    0.09    0.10
norm_rf  |    0.26 |    0.12 |    0.02 |    1.12
norm_vf  |    0.08 |    0.02 |    0.05 |    0.17
gs_f     |    1.20 |    1.51 |    0.01 |   11.34
thrust   |   -0.00   -0.00   -0.00 |    0.68    0.68    0.69 |   -3.42   -3.41   -3.29 |    3.43    3.36    3.42
norm_thrust |    0.92 |    0.74 |    0.00 |    3.46
fuel     |    1.59 |    0.20 |    1.15 |    2.49
rewards  |  -14.37 |

ADVA:  (19182,) (35514,) 0.5401250211184322
ADV1:  0.0007305268588675487 0.0004061387483787517 0.00838637476099236 0.05297959917321815 -0.0923137171069781
ADVB:  (21140,) (35514,) 0.5952582080306358
ADV2:  0.17316104126435602 0.40867355639828035 0.5517899039980982 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8569   0.2685   1.4252  44.3057  15.5506  10.7326
***** Episode 26658, Mean R = -12.4  Std R = 3.9  Min R = -21.6
PolicyLoss: 2.43
Policy_Entropy: 0.17
Policy_KL: 0.00527
Policy_SD: 0.547
Steps: 1.18e+04
TotalSteps: 9.47e+06
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.000282


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0016   0.0059   9.0125   2.2441   3.0464
ADVA:  (19252,) (35339,) 0.5447805540620844
ADV1:  0.0006031475612655827 0.00011660609663802107 0.008542192075245144 0.05297959917321815 -0.0923137171069781
ADVB:  (20756,) (35339,) 0.587339766263901
ADV2:  0.1562112148946316 0.3799680995354163 0.5153468896837368 3.0

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0043   0.0025   0.0085   9.0125   2.2441   3.0464
ADVA:  (18119,) (35148,) 0.5155058609309207
ADV1:  0.0007538650744964692 0.0005796474939405824 0.007726665387790589 0.059484690218008174 -0.062133911755902105
ADVB:  (21861,) (35148,) 0.6219699556162512
ADV2:  0.2379713914168936 0.45511326489068166 0.5702644503580219 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.9157   1.5282   6.0249  44.3057  15.5506  10.7326
***** Episode 26906, Mean R = -12.0  Std R = 5.5  Min R = -30.2
PolicyLoss: 2.6
Policy_Entropy: 0.171
Policy_KL: 0.00725
Policy_SD: 0.555
Steps: 1.16e+04
TotalSteps: 9.56e+06
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.000108


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0022   0.0077   9.0125   2.2441   3.0464
ADVA:  (19563,) (35020,) 0.5586236436322102
ADV1:  8.421697506352452e-05 -0.0003292341183493358 0.008465936267750088 0.055961674187556765 -0.0846560703045417
ADV

***** Episode 27123, Mean R = -14.0  Std R = 4.8  Min R = -23.7
PolicyLoss: 2.01
Policy_Entropy: 0.173
Policy_KL: 0.00483
Policy_SD: 0.554
Steps: 1.18e+04
TotalSteps: 9.64e+06
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.000136


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0069   0.0038   0.0121   9.0125   2.2441   3.0464
ADVA:  (20101,) (35469,) 0.5667202345710338
ADV1:  0.00025369531833173767 -0.0007234414282200874 0.010446899975957949 0.05575665181901479 -0.07335797237823768
ADVB:  (20408,) (35469,) 0.5753756801714173
ADV2:  0.14061937533626792 0.3546935527922826 0.47397103049114253 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0753   0.5013   2.2376  44.3057  15.5506  10.7326
***** Episode 27154, Mean R = -12.9  Std R = 5.7  Min R = -26.7
PolicyLoss: 2.17
Policy_Entropy: 0.173
Policy_KL: 0.00449
Policy_SD: 0.545
Steps: 1.2e+04
TotalSteps: 9.65e+06
VF_0_ExplainedVarNew: 0.945
VF_0_ExplainedVarOld: 0.935
VF_0_Loss : 0.000121


ValFun  G

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4013   0.6504   2.5121  44.3057  15.5506  10.7326
***** Episode 27371, Mean R = -13.5  Std R = 6.0  Min R = -32.3
PolicyLoss: 2.09
Policy_Entropy: 0.174
Policy_KL: 0.00669
Policy_SD: 0.554
Steps: 1.18e+04
TotalSteps: 9.74e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.000136


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0015   9.0125   2.2441   3.0464
ADVA:  (20719,) (35325,) 0.5865251238499646
ADV1:  0.0005118415237493016 -0.0002587062883023324 0.00881258608334173 0.07231195181427608 -0.1094523976843127
ADVB:  (20608,) (35325,) 0.583382873319179
ADV2:  0.13276886198337312 0.32612859550216183 0.4372925373424756 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4761   0.2099   0.9930  44.3057  15.5506  10.7326
***** Episode 27402, Mean R = -12.3  Std R = 5.7  Min R = -26.6
PolicyLoss: 1.97
Policy_Entropy: 0.173
Policy_KL: 0.00697
Policy_SD: 0.546
Steps: 1.17e+04
TotalStep

seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -0.99 |    0.99    0.99
cs_angles |  0.0015  0.0005 |  0.0770  0.0827 | -0.9793 -0.9919 |  0.9925  0.9860
optical_flow | -0.0001  0.0000 |  0.0252  0.0250 | -1.1042 -1.2279 |  1.1114  1.2429
v_err    | -0.0107 |  0.0591 | -0.4540 |  0.1107
landing_rewards |    9.13 |    2.82 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.02
tracking_rewards |  -17.72 |    4.51 |  -34.90 |  -10.41
steps    |     376 |      19 |     335 |     420
***** Episode 27650, Mean R = -13.9  Std R = 5.7  Min R = -25.4
PolicyLoss: 1.78
Policy_Entropy: 0.173
Policy_KL: 0.00428
Policy_SD: 0.547
Steps: 1.17e+04
TotalSteps: 9.84e+06
VF_0_ExplainedVarNew: 0.952
VF_0_ExplainedVarOld: 0.95
VF_0_Loss : 0.000122


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0010   0.0038   9.0125   2.2441   3.0464
ADVA:  (20997,) (35092,) 0.5983415023367149
ADV1:  0.0006644658866261305 -0.0003289075918068819 0.009751050229732027 0.05791

attitude |   -0.06   -0.04    0.18 |    1.16    0.63    1.86 |   -3.14   -1.55   -3.14 |    3.14    1.55    3.14
w        |    0.00    0.00   -0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.03    0.13 |    0.63    1.87 |   -1.55   -3.13 |    1.41    3.14
w_f      |    0.01    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.03 |    0.04    0.02    0.04
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.25 |    0.00 |    1.34
seeker_angles |   -0.00    0.00 |    0.08    0.08 |   -0.98   -0.99 |    1.00    0.99
cs_angles | -0.0003  0.0003 |  0.0801  0.0809 | -0.9820 -0.9940 |  0.9953  0.9923
optical_flow |  0.0000  0.0000 |  0.0248  0.0240 | -1.0893 -1.2627 |  1.2408  1.1488
v_err    | -0.0107 |  0.0590 | -0.4538 |  0.1224
landing_rewards |    9.26 |    2.62 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (21977,) (35311,) 0.6223839596726233
ADV1:  -0.0003901676803432057 -0.0014235877478337303 0.01060879931916912 0.051150348398524004 -0.07388797059844399
ADVB:  (14367,) (35311,) 0.4068703803347399
ADV2:  0.0 0.3049633484223863 0.5268418677564342 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.5834   1.1024   5.3591 101.1911  36.8110  32.5770
Update Cnt = 910    ET =   1530.4   Stats:  Mean, Std, Min, Max
r_f      |  -23.33    5.84   12.17 |  165.70  166.81  211.65 | -388.16 -384.71 -379.47 |  383.94  391.82  386.33
v_f      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.11   -0.10 |    0.11    0.10    0.11
r_i      |  -93.58   -1.56   44.97 |  630.51  643.24  805.87 |-1327.99-1263.46-1374.88 | 1218.82 1303.31 1325.92
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.10    0.10    0.09
norm_rf  |    0.24 |    0.10 |    0.06 |    0.51
norm_vf  |    0.08 |    0.02 |    0.04 |    0.14
gs_f     |    1.45 |   

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0018   9.0125   2.2441   3.0464
ADVA:  (20488,) (35275,) 0.5808079376328845
ADV1:  0.0025523167718896032 0.0022630547097363435 0.008055028455274643 0.04952453638504109 -0.09242162856534941
ADVB:  (23792,) (35275,) 0.6744720056697378
ADV2:  0.3586435380670078 0.5405196367977246 0.6145640394262833 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2682   0.0934   0.4932 101.1911  36.8110  32.5770
***** Episode 28518, Mean R = -12.9  Std R = 4.3  Min R = -21.7
PolicyLoss: 2.81
Policy_Entropy: 0.174
Policy_KL: 0.00311
Policy_SD: 0.548
Steps: 1.17e+04
TotalSteps: 1.02e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 6.28e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0013   0.0053   9.0125   2.2441   3.0464
ADVA:  (21081,) (35129,) 0.6001024794329471
ADV1:  0.0010010343010131064 -6.390261351434004e-05 0.009317009552066787 0.04848665098349258 -0.09242162856534941
ADVB:

***** Episode 28735, Mean R = -15.4  Std R = 10.5  Min R = -60.4
PolicyLoss: 1.94
Policy_Entropy: 0.175
Policy_KL: 0.00375
Policy_SD: 0.549
Steps: 1.16e+04
TotalSteps: 1.03e+07
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.953
VF_0_Loss : 0.000222


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0034   0.0019   0.0067   9.0125   2.2441   3.0464
ADVA:  (21151,) (35193,) 0.601000198903191
ADV1:  0.00023537117684416936 -0.0009970160124961908 0.011048312150841353 0.0848662987527738 -0.06928302709381484
ADVB:  (19733,) (35193,) 0.5607080953598726
ADV2:  0.0892227705186337 0.3060677025195785 0.4594763914502523 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2396   0.0967   0.4606 101.1911  36.8110  32.5770
***** Episode 28766, Mean R = -14.9  Std R = 7.2  Min R = -34.4
PolicyLoss: 1.9
Policy_Entropy: 0.175
Policy_KL: 0.00446
Policy_SD: 0.556
Steps: 1.17e+04
TotalSteps: 1.03e+07
VF_0_ExplainedVarNew: 0.946
VF_0_ExplainedVarOld: 0.941
VF_0_Loss : 0.000175


ValFun  Gra

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3539   0.1441   0.7287 101.1911  36.8110  32.5770
***** Episode 28983, Mean R = -14.1  Std R = 5.7  Min R = -28.7
PolicyLoss: 1.98
Policy_Entropy: 0.175
Policy_KL: 0.00572
Policy_SD: 0.558
Steps: 1.16e+04
TotalSteps: 1.03e+07
VF_0_ExplainedVarNew: 0.954
VF_0_ExplainedVarOld: 0.95
VF_0_Loss : 0.000119


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0008   0.0033   9.0125   2.2441   3.0464
ADVA:  (19905,) (35028,) 0.5682596779719082
ADV1:  0.0011935092238249054 0.00019064444074505464 0.009933586632962163 0.08008081389819477 -0.08467209054539832
ADVB:  (21872,) (35028,) 0.6244147539111568
ADV2:  0.21433033924929631 0.3859817626975484 0.4831123172056076 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2505   0.0879   0.4590 101.1911  36.8110  32.5770
***** Episode 29014, Mean R = -15.0  Std R = 5.7  Min R = -28.5
PolicyLoss: 2.17
Policy_Entropy: 0.175
Policy_KL: 0.00434
Policy_SD: 0.569
Steps: 1.16e+04
TotalSt

ADVA:  (23135,) (35253,) 0.6562562051456614
ADV1:  0.002138464308956964 0.0007446578792179204 0.010485893478948941 0.0722382459458486 -0.06679774531068572
ADVB:  (21699,) (35253,) 0.6155220832269594
ADV2:  0.2066449036215099 0.37972856265317284 0.48123541058841307 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6439   0.2702   1.2140 101.1911  36.8110  32.5770
***** Episode 29231, Mean R = -11.9  Std R = 4.6  Min R = -22.7
PolicyLoss: 2.15
Policy_Entropy: 0.177
Policy_KL: 0.00456
Policy_SD: 0.541
Steps: 1.21e+04
TotalSteps: 1.04e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 5.58e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0021   0.0079   9.0125   2.2441   3.0464
ADVA:  (22742,) (35142,) 0.6471458653463092
ADV1:  0.003368457781693911 0.0021468042369032075 0.009618554653609838 0.05445678359217557 -0.07103185006339868
ADVB:  (24429,) (35142,) 0.6951511012463719
ADV2:  0.36486278552327034 0.4836961964179544 0.5200681672540086 3

w        |    0.00    0.00   -0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.03   -0.01 |    0.68    1.89 |   -1.35   -3.14 |    1.45    3.13
w_f      |    0.01    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.03 |    0.05    0.02    0.04
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.26 |    0.00 |    1.52
seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -0.99   -0.98 |    1.00    1.00
cs_angles |  0.0013 -0.0021 |  0.0788  0.0829 | -0.9944 -0.9782 |  0.9994  0.9975
optical_flow | -0.0001 -0.0000 |  0.0257  0.0239 | -1.0892 -1.2963 |  1.3093  1.2303
v_err    | -0.0103 |  0.0587 | -0.4535 |  0.1151
landing_rewards |    9.16 |    2.77 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.05 |    0.03
tracking_rewards |  -18.09 |    5.94 |  -60.24 |  -10.86
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6503   0.2326   1.1694 101.1911  36.8110  32.5770
Update Cnt = 960    ET =   1387.2   Stats:  Mean, Std, Min, Max
r_f      |   15.20   -1.85   -1.82 |  187.49  175.31  199.09 | -392.41 -376.21 -398.25 |  393.75  388.25  385.00
v_f      |   -0.01   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.09 |    0.09    0.10    0.10
r_i      |   61.33    3.72  -31.93 |  703.26  661.85  759.84 |-1337.94-1320.61-1307.28 | 1347.19 1303.64 1279.13
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.25 |    0.10 |    0.06 |    0.66
norm_vf  |    0.08 |    0.01 |    0.03 |    0.13
gs_f     |    1.41 |    2.50 |    0.01 |   27.31
thrust   |   -0.00   -0.00    0.00 |    0.68    0.68    0.68 |   -3.43   -3.44   -3.31 |    3.30    3.46    3.41
norm_thrust |    0.92 |    0.74 |    0.00 |    3.46
fuel     |    1.59 |    0.22 |    1.09 |    2.76
rewards  |  -13.69 |

ADVA:  (20416,) (35286,) 0.578586408207221
ADV1:  0.000609107003104542 5.022653139785265e-05 0.00884548367719857 0.09023420949477118 -0.08523029327392578
ADVB:  (20829,) (35286,) 0.5902907668763816
ADV2:  0.15395321152088487 0.35404182522148764 0.4717888835858502 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7386   0.3893   1.5976 101.1911  36.8110  32.5770
***** Episode 30068, Mean R = -12.2  Std R = 4.2  Min R = -26.6
PolicyLoss: 2.06
Policy_Entropy: 0.18
Policy_KL: 0.00575
Policy_SD: 0.555
Steps: 1.17e+04
TotalSteps: 1.08e+07
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.000482


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0003   9.0125   2.2441   3.0464
ADVA:  (22148,) (35247,) 0.6283655346554317
ADV1:  0.001096206896746903 -0.00015120080006083078 0.009230876681627753 0.040467128672272756 -0.08523029327392578
ADVB:  (21201,) (35247,) 0.6014979998297727
ADV2:  0.16305947206577523 0.3238276733830881 0.41289799082643713

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0021   9.0125   2.2441   3.0464
ADVA:  (21519,) (35192,) 0.6114741986815185
ADV1:  0.00156770012237944 0.00027192024182824947 0.00998105092386749 0.06118199299406629 -0.07033236359564421
ADVB:  (22083,) (35192,) 0.6275005683109798
ADV2:  0.20686805940394215 0.36790798275204106 0.4512314535486322 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4587   0.1749   0.8330 101.1911  36.8110  32.5770
***** Episode 30316, Mean R = -13.8  Std R = 6.3  Min R = -31.6
PolicyLoss: 2.03
Policy_Entropy: 0.18
Policy_KL: 0.00462
Policy_SD: 0.545
Steps: 1.19e+04
TotalSteps: 1.08e+07
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.000327


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0012   0.0046   9.0125   2.2441   3.0464
ADVA:  (22317,) (35195,) 0.6340957522375338
ADV1:  0.000537411135886675 -0.00020117939064466278 0.009267552642921806 0.06230641631154282 -0.07066923191635538
ADVB:  

***** Episode 30533, Mean R = -15.8  Std R = 12.1  Min R = -73.1
PolicyLoss: 1.81
Policy_Entropy: 0.181
Policy_KL: 0.00511
Policy_SD: 0.549
Steps: 1.17e+04
TotalSteps: 1.09e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.00187


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0038   9.0125   2.2441   3.0464
ADVA:  (21955,) (35271,) 0.6224660485951632
ADV1:  0.0003679602100822507 -0.0007731612893925668 0.00962557199762452 0.06697713257131627 -0.07975168079612205
ADVB:  (19090,) (35271,) 0.5412378441212328
ADV2:  0.06884859221995272 0.28843935732927495 0.4157746582879679 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9796   0.3502   1.6504 101.1911  36.8110  32.5770
***** Episode 30564, Mean R = -13.5  Std R = 7.3  Min R = -35.7
PolicyLoss: 1.83
Policy_Entropy: 0.181
Policy_KL: 0.00463
Policy_SD: 0.549
Steps: 1.17e+04
TotalSteps: 1.09e+07
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.957
VF_0_Loss : 0.00279


ValFun  Gr

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3781   0.1355   0.6001 101.1911  36.8110  32.5770
***** Episode 30781, Mean R = -12.1  Std R = 7.2  Min R = -33.6
PolicyLoss: 2.01
Policy_Entropy: 0.182
Policy_KL: 0.00489
Policy_SD: 0.545
Steps: 1.17e+04
TotalSteps: 1.1e+07
VF_0_ExplainedVarNew: 0.962
VF_0_ExplainedVarOld: 0.957
VF_0_Loss : 0.00115


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0062   0.0037   0.0129   9.0125   2.2441   3.0464
ADVA:  (22116,) (35008,) 0.6317413162705667
ADV1:  0.0 -0.0017076875834282823 0.010846831321765631 0.06772701082436372 -0.08954976773839979
ADVB:  (16297,) (35008,) 0.4655221663619744
ADV2:  0.0 0.23032225359011935 0.3878332693617229 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3685   0.1590   0.6858 101.1911  36.8110  32.5770
***** Episode 30812, Mean R = -14.9  Std R = 6.3  Min R = -26.5
PolicyLoss: 1.7
Policy_Entropy: 0.182
Policy_KL: 0.00471
Policy_SD: 0.537
Steps: 1.19e+04
TotalSteps: 1.1e+07
VF_0_ExplainedVarNew: 

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.03    0.01 |    0.63    1.84 |   -1.50   -3.11 |    1.40    3.10
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.03 |    0.04    0.02    0.05
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.26 |    0.00 |    1.48
seeker_angles |   -0.00   -0.00 |    0.08    0.08 |   -0.99   -0.95 |    0.99    0.99
cs_angles | -0.0008 -0.0011 |  0.0770  0.0828 | -0.9864 -0.9545 |  0.9861  0.9940
optical_flow | -0.0002  0.0001 |  0.0238  0.0229 | -1.1902 -0.8638 |  1.0512  0.9563
v_err    | -0.0106 |  0.0592 | -0.4535 |  0.1160
landing_rewards |    9.06 |    2.91 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -17.70 |    4.57 |  -34.24 |   -9.11
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3269   0.4799   2.2099 101.1911  36.8110  32.5770
Update Cnt = 1010    ET =   1393.1   Stats:  Mean, Std, Min, Max
r_f      |    7.17   -6.29    2.83 |  180.05  179.68  190.32 | -394.09 -376.84 -382.52 |  399.85  380.82  380.05
v_f      |   -0.00   -0.00   -0.00 |    0.05    0.05    0.05 |   -0.11   -0.12   -0.09 |    0.12    0.09    0.10
r_i      |   25.14  -25.43   39.10 |  682.64  687.32  735.34 |-1331.17-1300.22-1306.60 | 1339.07 1337.90 1300.79
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.08    0.09    0.09
norm_rf  |    0.24 |    0.10 |    0.03 |    0.55
norm_vf  |    0.08 |    0.01 |    0.04 |    0.13
gs_f     |    1.01 |    1.15 |    0.01 |   10.34
thrust   |   -0.00    0.00   -0.00 |    0.67    0.67    0.67 |   -3.25   -3.37   -3.42 |    3.29    3.08    3.46
norm_thrust |    0.90 |    0.73 |    0.00 |    3.46
fuel     |    1.56 |    0.20 |    1.09 |    2.20
rewards  |  -12.66 

ADVA:  (22017,) (35206,) 0.6253763563029029
ADV1:  0.0005436035837340231 -0.0008992995240108135 0.009428533209012394 0.043678715746784835 -0.05660187662736172
ADVB:  (20471,) (35206,) 0.5814633869226836
ADV2:  0.12149024243769692 0.2893084134829005 0.39191173610500835 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0708   0.5060   2.2313 101.1911  36.8110  32.5770
***** Episode 31618, Mean R = -11.8  Std R = 4.7  Min R = -22.1
PolicyLoss: 1.69
Policy_Entropy: 0.183
Policy_KL: 0.00377
Policy_SD: 0.543
Steps: 1.18e+04
TotalSteps: 1.13e+07
VF_0_ExplainedVarNew: 0.942
VF_0_ExplainedVarOld: 0.938
VF_0_Loss : 0.000157


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0013   0.0044   9.0125   2.2441   3.0464
ADVA:  (22821,) (35606,) 0.6409313037128573
ADV1:  0.0007728552543923911 -0.0007347835799767454 0.009975335272859853 0.046348106631634495 -0.06397597009812223
ADVB:  (20979,) (35606,) 0.5891984496994889
ADV2:  0.1396170929218336 0.3084235166055654 0.413634443868

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0026   9.0125   2.2441   3.0464
ADVA:  (21892,) (35220,) 0.6215786484951732
ADV1:  0.002197279266168025 0.00018344474890935418 0.011992276887257803 0.0755874934361177 -0.07760917687927751
ADVB:  (21942,) (35220,) 0.6229982964224873
ADV2:  0.2332925212166417 0.3801334733936753 0.4548077759234172 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7311   0.7356   2.8959 101.1911  36.8110  32.5770
***** Episode 31866, Mean R = -13.4  Std R = 5.0  Min R = -21.8
PolicyLoss: 2.1
Policy_Entropy: 0.183
Policy_KL: 0.00642
Policy_SD: 0.548
Steps: 1.16e+04
TotalSteps: 1.14e+07
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.000117


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0032   9.0125   2.2441   3.0464
ADVA:  (20626,) (35198,) 0.5859992045002557
ADV1:  0.002422731467344952 0.0014615285028231656 0.009892476495846264 0.0755874934361177 -0.07760917687927751
ADVB:  (23

***** Episode 32083, Mean R = -11.7  Std R = 5.1  Min R = -24.3
PolicyLoss: 1.92
Policy_Entropy: 0.184
Policy_KL: 0.00414
Policy_SD: 0.542
Steps: 1.18e+04
TotalSteps: 1.15e+07
VF_0_ExplainedVarNew: 0.955
VF_0_ExplainedVarOld: 0.953
VF_0_Loss : 0.000165


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0044   0.0030   0.0102   9.0125   2.2441   3.0464
ADVA:  (22196,) (35075,) 0.632815395580898
ADV1:  0.0010414810339315506 -0.0008818206567814166 0.011322525542891418 0.09724800867128908 -0.08729509142852038
ADVB:  (20220,) (35075,) 0.5764789736279401
ADV2:  0.1365977336054336 0.31592568051591524 0.4125628177291764 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2839   0.0936   0.4832 101.1911  36.8110  32.5770
***** Episode 32114, Mean R = -14.0  Std R = 7.5  Min R = -34.5
PolicyLoss: 1.86
Policy_Entropy: 0.184
Policy_KL: 0.00407
Policy_SD: 0.544
Steps: 1.15e+04
TotalSteps: 1.15e+07
VF_0_ExplainedVarNew: 0.948
VF_0_ExplainedVarOld: 0.94
VF_0_Loss : 0.000282


ValFun  Gra

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6601   0.2639   1.3520 101.1911  36.8110  32.5770
***** Episode 32331, Mean R = -11.6  Std R = 6.1  Min R = -35.4
PolicyLoss: 2.18
Policy_Entropy: 0.185
Policy_KL: 0.00426
Policy_SD: 0.546
Steps: 1.18e+04
TotalSteps: 1.16e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.00088


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0030   9.0125   2.2441   3.0464
ADVA:  (19720,) (35102,) 0.5617913509201755
ADV1:  0.0022284557423996243 0.001312896518051713 0.00898438763574712 0.05563512289982436 -0.11067292473604795
ADVB:  (24345,) (35102,) 0.6935502250584012
ADV2:  0.37380344963150114 0.491406369904409 0.530627009441429 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4353   0.1822   0.8601 101.1911  36.8110  32.5770
***** Episode 32362, Mean R = -11.6  Std R = 5.8  Min R = -27.0
PolicyLoss: 2.42
Policy_Entropy: 0.185
Policy_KL: 0.00478
Policy_SD: 0.541
Steps: 1.15e+04
TotalSteps: 

seeker_angles |   -0.00   -0.00 |    0.08    0.08 |   -1.00   -0.99 |    1.00    0.98
cs_angles | -0.0006 -0.0018 |  0.0779  0.0823 | -0.9977 -0.9938 |  0.9987  0.9831
optical_flow |  0.0000 -0.0000 |  0.0230  0.0229 | -0.9608 -1.0550 |  0.9956  1.0955
v_err    | -0.0111 |  0.0595 | -0.4524 |  0.1112
landing_rewards |    8.84 |    3.20 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.04
tracking_rewards |  -17.46 |    4.68 |  -35.66 |   -9.74
steps    |     377 |      20 |     333 |     420
***** Episode 32610, Mean R = -13.4  Std R = 6.2  Min R = -31.5
PolicyLoss: 2.2
Policy_Entropy: 0.185
Policy_KL: 0.00554
Policy_SD: 0.552
Steps: 1.17e+04
TotalSteps: 1.17e+07
VF_0_ExplainedVarNew: 0.954
VF_0_ExplainedVarOld: 0.951
VF_0_Loss : 0.000275


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0027   9.0125   2.2441   3.0464
ADVA:  (21682,) (35169,) 0.6165088572322216
ADV1:  0.0010610875969558047 -0.00023568342553600619 0.010059897935981462 0.0640

w        |    0.00    0.00   -0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.01    0.19 |    0.66    1.96 |   -1.52   -3.13 |    1.43    3.14
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.03 |    0.05    0.02    0.04
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.25 |    0.00 |    1.60
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.96   -0.99 |    0.99    0.99
cs_angles |  0.0001  0.0003 |  0.0766  0.0816 | -0.9627 -0.9851 |  0.9896  0.9866
optical_flow |  0.0001  0.0000 |  0.0230  0.0229 | -1.0938 -0.9768 |  1.1262  1.0521
v_err    | -0.0110 |  0.0587 | -0.4518 |  0.1094
landing_rewards |    9.26 |    2.62 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.05
tracking_rewards |  -17.25 |    4.50 |  -34.06 |   -9.78
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3386   0.1327   0.6791 101.1911  36.8110  32.5770
Update Cnt = 1070    ET =   1323.2   Stats:  Mean, Std, Min, Max
r_f      |    2.86  -16.79  -20.54 |  185.04  165.82  202.68 | -390.70 -367.13 -395.58 |  393.04  386.71  368.26
v_f      |    0.00    0.01    0.01 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.11 |    0.12    0.10    0.13
r_i      |   12.08  -67.08  -47.68 |  669.55  648.13  774.54 |-1280.04-1311.58-1301.85 | 1350.93 1330.81 1324.93
v_i      |   -0.00    0.01    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.20 |    0.08 |    0.02 |    0.51
norm_vf  |    0.08 |    0.01 |    0.05 |    0.15
gs_f     |    1.36 |    1.98 |    0.01 |   17.35
thrust   |   -0.00    0.00    0.00 |    0.66    0.68    0.67 |   -3.13   -3.21   -3.43 |    3.40    3.46    3.28
norm_thrust |    0.90 |    0.73 |    0.00 |    3.46
fuel     |    1.53 |    0.19 |    1.12 |    2.15
rewards  |  -12.63 

ADVA:  (19647,) (34967,) 0.561872622758601
ADV1:  0.0 -0.0006074356833528476 0.008017535025799475 0.052803577088784914 -0.09256270276879708
ADVB:  (18058,) (34967,) 0.5164297766465524
ADV2:  0.03039721317551243 0.28236630419719183 0.42696957801003527 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3369   0.1171   0.6728 101.1911  36.8110  32.5770
***** Episode 33478, Mean R = -13.6  Std R = 5.7  Min R = -28.5
PolicyLoss: 1.84
Policy_Entropy: 0.187
Policy_KL: 0.00493
Policy_SD: 0.535
Steps: 1.17e+04
TotalSteps: 1.2e+07
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.00106


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0021   9.0125   2.2441   3.0464
ADVA:  (21863,) (35139,) 0.6221861749053758
ADV1:  0.0003319014292668034 -0.0005411853497105446 0.008841714598487567 0.04372806249416783 -0.09256270276879708
ADVB:  (18233,) (35139,) 0.5188821537323202
ADV2:  0.03398491767598163 0.2734986005280241 0.40626163291635303 3.0 0.0
Policy

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0053   0.0034   0.0119   9.0125   2.2441   3.0464
ADVA:  (19342,) (35350,) 0.5471570014144271
ADV1:  0.003363630806695981 0.0019832798470192704 0.010068070955119295 0.05439712137224839 -0.07984480621863964
ADVB:  (25188,) (35350,) 0.7125318246110325
ADV2:  0.4746364792043129 0.5673454318762938 0.5664081112015119 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.1170   3.4140  12.2151 101.1911  36.8110  32.5770
***** Episode 33726, Mean R = -11.4  Std R = 5.3  Min R = -26.1
PolicyLoss: 2.71
Policy_Entropy: 0.187
Policy_KL: 0.00728
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps: 1.21e+07
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 7.66e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0010   0.0037   9.0125   2.2441   3.0464
ADVA:  (18972,) (35343,) 0.5367965367965368
ADV1:  0.0008393743392427801 0.0003512878706326268 0.009977656162378275 0.05439712137224839 -0.07984480621863964
ADVB:  

***** Episode 33943, Mean R = -12.5  Std R = 5.6  Min R = -31.3
PolicyLoss: 2.19
Policy_Entropy: 0.186
Policy_KL: 0.00406
Policy_SD: 0.549
Steps: 1.18e+04
TotalSteps: 1.22e+07
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.000236


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0015   9.0125   2.2441   3.0464
ADVA:  (22650,) (35171,) 0.6439964743680874
ADV1:  0.001985220742541592 0.00047510056186253897 0.010050868568716217 0.05461536082759155 -0.09953583399474136
ADVB:  (22653,) (35171,) 0.6440817719143612
ADV2:  0.2357970793034197 0.3691382786251348 0.444583590238004 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5792   0.2812   1.1007 101.1911  36.8110  32.5770
***** Episode 33974, Mean R = -10.6  Std R = 4.5  Min R = -21.8
PolicyLoss: 1.94
Policy_Entropy: 0.186
Policy_KL: 0.00419
Policy_SD: 0.545
Steps: 1.16e+04
TotalSteps: 1.22e+07
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 6.95e-05


ValFun  Grad

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2852   0.1028   0.5408 101.1911  36.8110  32.5770
***** Episode 34191, Mean R = -12.5  Std R = 5.2  Min R = -26.6
PolicyLoss: 1.58
Policy_Entropy: 0.188
Policy_KL: 0.0068
Policy_SD: 0.533
Steps: 1.18e+04
TotalSteps: 1.23e+07
VF_0_ExplainedVarNew: 0.957
VF_0_ExplainedVarOld: 0.953
VF_0_Loss : 0.000593


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0047   9.0125   2.2441   3.0464
ADVA:  (21240,) (35062,) 0.605784039701101
ADV1:  0.0 -0.002208554303207376 0.010196354923501863 0.06263607980680463 -0.06447085654082843
ADVB:  (17852,) (35062,) 0.509155210769494
ADV2:  0.014923395619099937 0.24209513062845164 0.3712322057947989 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3809   0.1197   0.6606 101.1911  36.8110  32.5770
***** Episode 34222, Mean R = -12.9  Std R = 5.6  Min R = -24.9
PolicyLoss: 1.6
Policy_Entropy: 0.188
Policy_KL: 0.00623
Policy_SD: 0.537
Steps: 1.16e+04
TotalSteps: 1.23e+07
VF_0_E

w        |    0.00    0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.05   -0.19 |    0.65    1.79 |   -1.51   -3.12 |    1.38    3.10
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.03 |    0.03    0.03    0.04
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.23 |    0.26 |    0.00 |    1.47
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -1.00 |    0.97    1.00
cs_angles |  0.0024  0.0010 |  0.0773  0.0798 | -0.9903 -0.9986 |  0.9740  0.9987
optical_flow | -0.0002  0.0000 |  0.0216  0.0240 | -0.9509 -1.0854 |  1.2596  1.0259
v_err    | -0.0109 |  0.0592 | -0.4532 |  0.1079
landing_rewards |    8.87 |    3.16 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -17.01 |    4.19 |  -29.74 |   -9.38
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8112   0.3793   1.6737 101.1911  36.8110  32.5770
Update Cnt = 1120    ET =   1281.4   Stats:  Mean, Std, Min, Max
r_f      |    0.79  -13.41   16.70 |  178.04  173.03  208.24 | -390.54 -390.18 -388.48 |  391.45  386.03  394.29
v_f      |   -0.00    0.01   -0.00 |    0.05    0.04    0.05 |   -0.09   -0.09   -0.12 |    0.09    0.11    0.11
r_i      |   28.10  -84.62   38.13 |  668.07  631.28  801.98 |-1320.03-1305.10-1310.35 | 1357.92 1288.11 1364.33
v_i      |   -0.00    0.01   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.08   -0.10 |    0.10    0.10    0.09
norm_rf  |    0.21 |    0.09 |    0.03 |    0.55
norm_vf  |    0.08 |    0.01 |    0.05 |    0.14
gs_f     |    1.27 |    1.42 |    0.01 |   10.92
thrust   |   -0.00   -0.00   -0.00 |    0.67    0.67    0.66 |   -3.33   -3.45   -3.28 |    3.46    3.44    3.43
norm_thrust |    0.89 |    0.74 |    0.00 |    3.46
fuel     |    1.52 |    0.19 |    0.98 |    2.23
rewards  |  -11.90 

ADVA:  (21763,) (35372,) 0.6152606581476875
ADV1:  0.0008981209948924918 -0.00019108525512531125 0.009032854837838073 0.08181963320673247 -0.06719439874437161
ADVB:  (20738,) (35372,) 0.5862829356553206
ADV2:  0.15224656593983113 0.33789105837624256 0.45550861887201327 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6517   0.2528   1.1123 101.1911  36.8110  32.5770
***** Episode 35028, Mean R = -12.0  Std R = 6.2  Min R = -26.5
PolicyLoss: 1.92
Policy_Entropy: 0.191
Policy_KL: 0.00479
Policy_SD: 0.534
Steps: 1.17e+04
TotalSteps: 1.26e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.000307


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0015   0.0055   9.0125   2.2441   3.0464
ADVA:  (21830,) (35188,) 0.6203819483914971
ADV1:  0.0005387389179257532 -0.0006214625258285225 0.009381324152903351 0.05022394114325912 -0.06719439874437161
ADVB:  (19476,) (35188,) 0.5534841423212459
ADV2:  0.08773952108562813 0.2944615472076348 0.423408116192

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0014   9.0125   2.2441   3.0464
ADVA:  (21710,) (35099,) 0.6185361406307872
ADV1:  0.000762709870177106 -0.0008092679737082286 0.010116642701108618 0.06470489872084956 -0.07792249638997445
ADVB:  (21359,) (35099,) 0.6085358557223852
ADV2:  0.17073680909159325 0.328287709595361 0.4206709939541322 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7341   0.2926   1.5434 101.1911  36.8110  32.5770
***** Episode 35276, Mean R = -13.3  Std R = 6.4  Min R = -29.3
PolicyLoss: 1.81
Policy_Entropy: 0.19
Policy_KL: 0.00461
Policy_SD: 0.544
Steps: 1.16e+04
TotalSteps: 1.27e+07
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.959
VF_0_Loss : 9.36e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0016   0.0059   9.0125   2.2441   3.0464
ADVA:  (21710,) (34861,) 0.6227589570006598
ADV1:  0.001810700439507238 0.00023297616724446153 0.010130827530511735 0.04867450553418329 -0.07792249638997445
ADVB:  

***** Episode 35493, Mean R = -12.7  Std R = 5.3  Min R = -26.3
PolicyLoss: 1.91
Policy_Entropy: 0.19
Policy_KL: 0.00392
Policy_SD: 0.541
Steps: 1.15e+04
TotalSteps: 1.28e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.00204


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0013   0.0050   9.0125   2.2441   3.0464
ADVA:  (21335,) (34778,) 0.6134625337857266
ADV1:  0.0009283258362647179 0.00031604506938839926 0.008433509836794244 0.07597455197954522 -0.1040894848778312
ADVB:  (20317,) (34778,) 0.584191155328081
ADV2:  0.14433345371400996 0.35727344334588307 0.47613830073675373 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2978   0.1387   0.6078 101.1911  36.8110  32.5770
***** Episode 35524, Mean R = -11.2  Std R = 4.5  Min R = -21.3
PolicyLoss: 2.05
Policy_Entropy: 0.19
Policy_KL: 0.00326
Policy_SD: 0.539
Steps: 1.16e+04
TotalSteps: 1.28e+07
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.00276


ValFun  Gradi

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0223   0.3241   1.6894 101.1911  36.8110  32.5770
***** Episode 35741, Mean R = -12.9  Std R = 5.5  Min R = -30.8
PolicyLoss: 2.16
Policy_Entropy: 0.191
Policy_KL: 0.00509
Policy_SD: 0.555
Steps: 1.17e+04
TotalSteps: 1.29e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.000216


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0033   0.0020   0.0069   9.0125   2.2441   3.0464
ADVA:  (18437,) (35049,) 0.5260349795999886
ADV1:  0.002081589778814723 0.0011692505718220807 0.009696045823479658 0.06049915681898868 -0.10084047548028258
ADVB:  (24731,) (35049,) 0.7056121429998002
ADV2:  0.4036529141429653 0.5257050586295829 0.5528659629611178 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7440   0.3363   1.5124 101.1911  36.8110  32.5770
***** Episode 35772, Mean R = -11.5  Std R = 4.1  Min R = -19.4
PolicyLo

attitude |   -0.12   -0.05    0.13 |    1.25    0.66    1.93 |   -3.14   -1.56   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.05    0.07 |    0.66    1.94 |   -1.48   -3.12 |    1.53    3.11
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.02 |    0.05    0.03    0.03
w_rewards |   -0.00 |    0.00 |   -0.01 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.25 |    0.00 |    1.49
seeker_angles |   -0.00    0.00 |    0.07    0.08 |   -0.98   -0.97 |    0.96    0.99
cs_angles | -0.0001  0.0024 |  0.0738  0.0779 | -0.9806 -0.9673 |  0.9643  0.9899
optical_flow | -0.0001  0.0001 |  0.0216  0.0227 | -0.9309 -1.0807 |  1.1206  1.1124
v_err    | -0.0101 |  0.0592 | -0.4553 |  0.1193
landing_rewards |    9.19 |    2.72 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5506   0.2579   1.1292 101.1911  36.8110  32.5770
Update Cnt = 1170    ET =   1239.0   Stats:  Mean, Std, Min, Max
r_f      |   -0.92   -4.71   18.80 |  177.50  180.07  195.69 | -392.96 -397.07 -383.13 |  385.13  384.82  382.67
v_f      |    0.00    0.00   -0.00 |    0.04    0.05    0.05 |   -0.09   -0.11   -0.11 |    0.10    0.10    0.09
r_i      |   -8.04  -20.60   86.92 |  673.42  677.08  746.01 |-1368.39-1337.35-1312.99 | 1259.82 1351.88 1286.10
v_i      |    0.00    0.00   -0.01 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.20 |    0.08 |    0.02 |    0.64
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f     |    1.32 |    2.10 |    0.00 |   23.78
thrust   |    0.00   -0.00   -0.00 |    0.66    0.68    0.66 |   -3.27   -3.43   -3.45 |    3.44    3.46    3.44
norm_thrust |    0.88 |    0.74 |    0.00 |    3.46
fuel     |    1.54 |    0.19 |    1.11 |    2.62
rewards  |  -11.34 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8148   0.3772   1.6222 101.1911  36.8110  32.5770
***** Episode 36578, Mean R = -12.2  Std R = 6.0  Min R = -25.6
PolicyLoss: 1.81
Policy_Entropy: 0.19
Policy_KL: 0.00463
Policy_SD: 0.547
Steps: 1.16e+04
TotalSteps: 1.32e+07
VF_0_ExplainedVarNew: 0.944
VF_0_ExplainedVarOld: 0.94
VF_0_Loss : 0.000108


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0009   0.0032   9.0125   2.2441   3.0464
ADVA:  (23077,) (35010,) 0.6591545272779206
ADV1:  0.0015811628328876379 -3.872583403127807e-05 0.010109684986056026 0.04626504431168243 -0.07450176208875747
ADVB:  (21297,) (35010,) 0.608311910882605
ADV2:  0.17230786520489744 0.3388274617541412 0.44148641288901047 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7003   0.2752   1.3907 101.1911  36.8110  32.5770
***** Episode 36609, Mean R = -11.6  Std R = 5.6  Min R = -28.1
PolicyLoss: 1.86
Policy_Entropy: 0.19
Policy_KL: 0.00487
Policy_SD: 0.544
Steps: 1.17e+04
TotalStep

ADVA:  (21979,) (34983,) 0.6282765914872939
ADV1:  0.0013763375609876175 0.0001513717540936644 0.010195611915145475 0.0756990668302272 -0.08030565731286327
ADVB:  (21127,) (34983,) 0.6039219049252494
ADV2:  0.17952961375632254 0.3619167575881639 0.46903386429025923 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7587   0.2570   1.2456 101.1911  36.8110  32.5770
***** Episode 36826, Mean R = -11.8  Std R = 4.3  Min R = -22.6
PolicyLoss: 1.99
Policy_Entropy: 0.192
Policy_KL: 0.00814
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 1.33e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 5.63e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0016   9.0125   2.2441   3.0464
ADVA:  (20272,) (35187,) 0.5761218631881092
ADV1:  0.0019813804032703247 0.0017015889787728633 0.007697675578047637 0.0756990668302272 -0.06374435012648982
ADVB:  (23803,) (35187,) 0.6764714241054935
ADV2:  0.342180189039767 0.5132749841870526 0.5910283058176026 3.

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0057   0.0038   0.0141   9.0125   2.2441   3.0464
ADVA:  (17925,) (34887,) 0.5138017026399518
ADV1:  0.0011357350898575343 0.0010442529166320707 0.009196922744322465 0.07058659531087635 -0.07195513980566431
ADVB:  (23738,) (34887,) 0.6804253733482386
ADV2:  0.3391081274377114 0.5557233005309604 0.6431518938456571 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   9.5671   6.1881  22.5374 101.1911  36.8110  32.5770
***** Episode 37074, Mean R = -11.9  Std R = 5.2  Min R = -26.1
PolicyLoss: 2.72
Policy_Entropy: 0.192
Policy_KL: 0.00682
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 1.34e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000544


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0071   0.0047   0.0180   9.0125   2.2441   3.0464
ADVA:  (19150,) (34961,) 0.5477532107205172
ADV1:  0.0 -0.001444130122

***** Episode 37291, Mean R = -12.5  Std R = 7.0  Min R = -29.0
PolicyLoss: 1.61
Policy_Entropy: 0.192
Policy_KL: 0.00663
Policy_SD: 0.549
Steps: 1.15e+04
TotalSteps: 1.35e+07
VF_0_ExplainedVarNew: 0.937
VF_0_ExplainedVarOld: 0.933
VF_0_Loss : 0.000139


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0006   0.0024   9.0125   2.2441   3.0464
ADVA:  (21704,) (35029,) 0.6196009021096806
ADV1:  0.0 -0.0023677036349211145 0.011467581421144938 0.062402397536071075 -0.07645801974438848
ADVB:  (20030,) (35029,) 0.5718119272602701
ADV2:  0.09993592173043098 0.2755610795389066 0.3872650553588214 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6264   0.1971   1.0228 101.1911  36.8110  32.5770
***** Episode 37322, Mean R = -12.3  Std R = 4.7  Min R = -20.6
PolicyLoss: 1.6
Policy_Entropy: 0.192
Policy_KL: 0.00554
Policy_SD: 0.544
Steps: 1.19e+04
TotalSteps: 1.35e+07
VF_0_ExplainedVarNew: 0.945
VF_0_ExplainedVarOld: 0.939
VF_0_Loss : 0.00075


Dynamics: Max Disturbance (m

w        |    0.00    0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.06    0.05    0.05
a_f      |   -0.04    0.01 |    0.66    1.90 |   -1.53   -3.13 |    1.42    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.03 |    0.06    0.02    0.04
w_rewards |   -0.00 |    0.00 |   -0.07 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.25 |    0.00 |    1.40
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.95   -0.99 |    0.99    0.99
cs_angles |  0.0010  0.0001 |  0.0760  0.0784 | -0.9488 -0.9877 |  0.9862  0.9906
optical_flow | -0.0000  0.0001 |  0.0210  0.0219 | -1.0418 -1.1148 |  1.1433  0.9499
v_err    | -0.0108 |  0.0600 | -0.4555 |  0.1188
landing_rewards |    8.84 |    3.20 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -16.43 |    4.37 |  -34.04 |   -9.18
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3840   0.1676   0.7334 101.1911  36.8110  32.5770
Update Cnt = 1220    ET =   1162.6   Stats:  Mean, Std, Min, Max
r_f      |  -20.28   -9.26   -6.93 |  192.51  174.76  190.92 | -399.02 -379.80 -364.22 |  378.61  396.11  395.30
v_f      |    0.01    0.00   -0.00 |    0.05    0.05    0.05 |   -0.11   -0.10   -0.11 |    0.11    0.10    0.12
r_i      |  -81.27  -35.46   10.95 |  689.28  663.82  746.41 |-1309.90-1352.05-1327.02 | 1236.14 1247.26 1366.42
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.08   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.19 |    0.08 |    0.02 |    0.45
norm_vf  |    0.08 |    0.01 |    0.05 |    0.13
gs_f     |    1.21 |    1.60 |    0.00 |   12.40
thrust   |    0.00   -0.00    0.00 |    0.67    0.67    0.68 |   -2.98   -3.42   -3.44 |    3.44    3.29    3.28
norm_thrust |    0.89 |    0.75 |    0.00 |    3.46
fuel     |    1.56 |    0.19 |    1.14 |    2.23
rewards  |  -12.33 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6792   0.3219   1.4083 101.1911  36.8110  32.5770
***** Episode 38128, Mean R = -11.0  Std R = 4.8  Min R = -28.8
PolicyLoss: 1.83
Policy_Entropy: 0.193
Policy_KL: 0.00567
Policy_SD: 0.549
Steps: 1.18e+04
TotalSteps: 1.38e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 8.9e-05


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0014   9.0125   2.2441   3.0464
ADVA:  (20869,) (35210,) 0.5927009372337404
ADV1:  0.000988958623758111 0.00036488750811540613 0.00787425352773402 0.06400404883739114 -0.0725404598522986
ADVB:  (21545,) (35210,) 0.6119000284010224
ADV2:  0.19394489256049974 0.3818895572470188 0.49034231662425437 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3054   0.1252   0.6145 101.1911  36.8110  32.5770
***** Episode 38159, Mean R = -10.3  Std R = 3.6  Min R = -20.8
PolicyL

ADVA:  (20756,) (35311,) 0.5878054997026423
ADV1:  0.0009152532590007883 0.00036178218744135107 0.00879286226431256 0.04552562966812995 -0.058633136463180247
ADVB:  (21247,) (35311,) 0.6017105151369262
ADV2:  0.18730799107030346 0.4048737993735446 0.5254936267697505 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5937   0.2211   0.9673 101.1911  36.8110  32.5770
***** Episode 38376, Mean R = -10.9  Std R = 5.1  Min R = -23.9
PolicyLoss: 2.23
Policy_Entropy: 0.193
Policy_KL: 0.00527
Policy_SD: 0.552
Steps: 1.17e+04
TotalSteps: 1.39e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 5.97e-05


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0024   9.0125   2.2441   3.0464
ADVA:  (21376,) (35497,) 0.6021917345127757
ADV1:  0.00012466501625019166 -0.0005706812882254246 0.008289667515250442 0.04350477679456774 -0.05678897265056104
ADVB:  (1921

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0014   9.0125   2.2441   3.0464
ADVA:  (22058,) (34907,) 0.6319076403013723
ADV1:  0.0010279422065735937 -0.0002223661774817986 0.009397367593698056 0.050046398259042224 -0.09026035925198816
ADVB:  (20866,) (34907,) 0.5977597616523906
ADV2:  0.1514414471402869 0.3142818374787561 0.41393168637370753 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5968   0.2614   1.1746 101.1911  36.8110  32.5770
***** Episode 38624, Mean R = -13.0  Std R = 6.2  Min R = -29.8
PolicyLoss: 1.74
Policy_Entropy: 0.194
Policy_KL: 0.00516
Policy_SD: 0.549
Steps: 1.16e+04
TotalSteps: 1.4e+07
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.957
VF_0_Loss : 0.000106


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0023   9.0125   2.2441   3.0464
ADVA:  (22749,) (34898,) 0.6518711674021433
ADV1:  0.0010282664595887177 -0.0003611713055828016 0.010236490822248626 0.05787994128198454 -0.08123704902472972
ADV

***** Episode 38841, Mean R = -11.4  Std R = 3.8  Min R = -19.1
PolicyLoss: 1.99
Policy_Entropy: 0.193
Policy_KL: 0.00585
Policy_SD: 0.557
Steps: 1.19e+04
TotalSteps: 1.41e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.000171


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0002   0.0008   9.0125   2.2441   3.0464
ADVA:  (21817,) (35527,) 0.6140963211078898
ADV1:  0.0015458223740255402 0.0004212151978411353 0.009029170905700786 0.03923104826679258 -0.06128473828920989
ADVB:  (22121,) (35527,) 0.622653193345906
ADV2:  0.2058070808253185 0.36393825175930405 0.445576441995172 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2229   0.0774   0.3633 101.1911  36.8110  32.5770
***** Episode 38872, Mean R = -11.0  Std R = 5.5  Min R = -24.9
PolicyLoss: 1.94
Policy_Entropy: 0.194
Policy_KL: 0.00648
Policy_SD: 0.55
Steps: 1.19e+04
TotalSteps: 1.41e+07
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.956
VF_0_Loss : 0.00049


ValFun  Gradien

cs_angles |  0.0009  0.0022 |  0.0735  0.0770 | -0.9976 -0.9684 |  0.9990  0.9992
optical_flow | -0.0000  0.0001 |  0.0208  0.0233 | -0.8747 -0.9121 |  1.1045  1.2178
v_err    | -0.0112 |  0.0599 | -0.4525 |  0.1010
landing_rewards |    8.97 |    3.04 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -15.91 |    4.24 |  -32.34 |   -9.61
steps    |     381 |      21 |     331 |     422
***** Episode 39120, Mean R = -12.2  Std R = 4.9  Min R = -23.5
PolicyLoss: 1.87
Policy_Entropy: 0.195
Policy_KL: 0.00755
Policy_SD: 0.546
Steps: 1.19e+04
TotalSteps: 1.42e+07
VF_0_ExplainedVarNew: 0.95
VF_0_ExplainedVarOld: 0.946
VF_0_Loss : 0.000127


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0057   9.0125   2.2441   3.0464
ADVA:  (21879,) (35376,) 0.6184701492537313
ADV1:  0.001235414275800267 9.59596676154715e-05 0.009912405592641413 0.06244913675068697 -0.06751467454718782
ADVB:  (21140,) (35376,) 0.5975802804161013
ADV2:  0.18

attitude |    0.03   -0.04    0.06 |    1.23    0.67    1.85 |   -3.14   -1.56   -3.14 |    3.14    1.56    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.05   -0.01 |    0.68    1.85 |   -1.56   -3.12 |    1.49    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.02 |    0.04    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.25 |    0.00 |    1.49
seeker_angles |   -0.00   -0.00 |    0.07    0.08 |   -0.97   -0.99 |    0.96    0.99
cs_angles | -0.0022 -0.0006 |  0.0741  0.0751 | -0.9676 -0.9877 |  0.9595  0.9868
optical_flow | -0.0000  0.0001 |  0.0194  0.0220 | -1.0660 -0.9997 |  0.8887  1.0719
v_err    | -0.0113 |  0.0600 | -0.4526 |  0.1128
landing_rewards |    8.81 |    3.24 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (21185,) (35436,) 0.5978383564736426
ADV1:  0.0008211752473799502 5.516146754415543e-05 0.00862506985386552 0.060961641183668924 -0.07680911591527656
ADVB:  (20730,) (35436,) 0.5849983068066373
ADV2:  0.14823550348485484 0.3397628479661607 0.45189237246930913 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7053   0.3196   1.4334 101.1911  36.8110  32.5770
Update Cnt = 1280    ET =   1273.5   Stats:  Mean, Std, Min, Max
r_f      |    3.65   -9.93    0.12 |  187.53  162.91  200.59 | -399.12 -367.51 -387.44 |  367.27  372.43  390.67
v_f      |   -0.00    0.00    0.00 |    0.05    0.04    0.05 |   -0.09   -0.09   -0.11 |    0.10    0.09    0.10
r_i      |    2.95  -38.02  -34.62 |  688.37  649.16  767.79 |-1342.93-1355.99-1291.47 | 1294.25 1290.83 1350.64
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.18 |    0.07 |    0.03 |    0.57
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f   

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0016   0.0060   9.0125   2.2441   3.0464
ADVA:  (21938,) (35264,) 0.6221075317604355
ADV1:  0.0005215301455066181 -0.0007636385756280741 0.009739483278507955 0.07499345351509457 -0.10411888662240398
ADVB:  (20333,) (35264,) 0.5765936932849365
ADV2:  0.12182289372166441 0.30461912011922176 0.40355766599392834 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6297   0.2779   1.2954 101.1911  36.8110  32.5770
***** Episode 39988, Mean R = -11.1  Std R = 5.8  Min R = -29.0
PolicyLoss: 1.74
Policy_Entropy: 0.194
Policy_KL: 0.00462
Policy_SD: 0.551
Steps: 1.16e+04
TotalSteps: 1.45e+07
VF_0_ExplainedVarNew: 0.936
VF_0_ExplainedVarOld: 0.931
VF_0_Loss : 0.000331


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0014   0.0048   9.0125   2.2441   3.0464
ADVA:  (20104,) (35023,) 0.5740227850269822
ADV1:  0.0 -0.00190659

***** Episode 40205, Mean R = -10.2  Std R = 4.2  Min R = -18.5
PolicyLoss: 1.85
Policy_Entropy: 0.196
Policy_KL: 0.00607
Policy_SD: 0.549
Steps: 1.15e+04
TotalSteps: 1.46e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.000202


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0060   9.0125   2.2441   3.0464
ADVA:  (20843,) (34894,) 0.5973233220610993
ADV1:  0.000510484848349535 -0.00030134255681105247 0.008033124856659558 0.07160044580634403 -0.07624581518447171
ADVB:  (19968,) (34894,) 0.572247377772683
ADV2:  0.12602659670034846 0.3265977301282776 0.4588235577429811 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5160   0.2259   0.9440 101.1911  36.8110  32.5770
***** Episode 40236, Mean R = -10.6  Std R = 4.2  Min R = -22.1
PolicyLoss: 1.87
Policy_Entropy: 0.196
Policy_KL: 0.00411
Policy_SD: 0.55
Steps: 1.18e+04
TotalSteps: 1.46e+07
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.000214


Dynamics: Ma

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3456   0.1400   0.6574 101.1911  36.8110  32.5770
***** Episode 40453, Mean R = -10.9  Std R = 6.1  Min R = -31.8
PolicyLoss: 2.06
Policy_Entropy: 0.195
Policy_KL: 0.00387
Policy_SD: 0.542
Steps: 1.16e+04
TotalSteps: 1.47e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000294


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0014   9.0125   2.2441   3.0464
ADVA:  (21094,) (35155,) 0.6000284454558384
ADV1:  0.001068413169222529 0.0004662809989680203 0.00713221818308796 0.05453554537358152 -0.07169275224208832
ADVB:  (21574,) (35155,) 0.6136822642582848
ADV2:  0.1877643350688248 0.37232396861248307 0.48112377197949513 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3403   0.1370   0.6193 101.1911  36.8110  32.5770
***** Episode 40484, Mean R = -9.1  Std R = 5.0  Min R = -19.6
PolicyLoss: 1.98
Policy_Entropy: 0.197
Policy_KL: 0.00512
Policy_SD: 0.546
Steps: 1.17e+04
TotalStep

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4678   0.1703   0.8233 101.1911  36.8110  32.5770
***** Episode 40701, Mean R = -9.5  Std R = 4.5  Min R = -20.4
PolicyLoss: 1.72
Policy_Entropy: 0.196
Policy_KL: 0.00532
Policy_SD: 0.547
Steps: 1.15e+04
TotalSteps: 1.48e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.000119


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0026   9.0125   2.2441   3.0464
ADVA:  (20645,) (34945,) 0.590785520103019
ADV1:  0.0009309841478731679 -9.613795253834794e-05 0.008819397087705306 0.08189521814522721 -0.07194715470240005
ADVB:  (21766,) (34945,) 0.6228645013592788
ADV2:  0.19155002918044906 0.34619408192555834 0.4340320791202486 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3964   0.1757   0.8340 101.1911  36.8110  32.5770
***** Episode 40732, Mean R = -10.2  Std R = 4.4  Min R = -20.5
PolicyLoss: 1.82
Policy_Entropy: 0.196
Policy_KL: 0.00439
Policy_SD: 0.542
Steps: 1.17e+04
TotalSt

seeker_angles |    0.00   -0.00 |    0.07    0.08 |   -0.99   -0.99 |    0.94    1.00
cs_angles |  0.0021 -0.0001 |  0.0740  0.0762 | -0.9902 -0.9934 |  0.9443  0.9996
optical_flow | -0.0001  0.0000 |  0.0209  0.0226 | -1.0783 -1.1025 |  0.9358  1.2078
v_err    | -0.0106 |  0.0599 | -0.4523 |  0.1131
landing_rewards |    8.71 |    3.35 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -15.44 |    4.17 |  -34.48 |   -8.54
steps    |     377 |      20 |     335 |     417
***** Episode 40980, Mean R = -10.5  Std R = 5.4  Min R = -24.0
PolicyLoss: 1.63
Policy_Entropy: 0.196
Policy_KL: 0.00658
Policy_SD: 0.549
Steps: 1.19e+04
TotalSteps: 1.49e+07
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.000361


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0024   9.0125   2.2441   3.0464
ADVA:  (21901,) (35445,) 0.6178868669770067
ADV1:  0.0010093032845621408 -0.000669882304784157 0.009518181297368835 0.07208

attitude |   -0.02   -0.01   -0.03 |    1.12    0.65    1.82 |   -3.14   -1.55   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.00   -0.04 |    0.66    1.82 |   -1.37   -3.13 |    1.53    3.13
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.03 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.25 |    0.00 |    1.39
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -1.00   -0.98 |    0.99    1.00
cs_angles |  0.0022  0.0010 |  0.0733  0.0804 | -0.9996 -0.9837 |  0.9948  0.9993
optical_flow |  0.0001 -0.0002 |  0.0206  0.0229 | -0.9228 -1.0236 |  1.0735  0.8759
v_err    | -0.0105 |  0.0592 | -0.4523 |  0.1167
landing_rewards |    9.23 |    2.67 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (21998,) (35213,) 0.624712464146764
ADV1:  0.000868695434606514 -0.0001052550104637019 0.009165935085161763 0.05798377683545419 -0.06749622092515328
ADVB:  (19917,) (35213,) 0.5656149717433903
ADV2:  0.10778761212680871 0.3241618057201544 0.45215715574808196 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6062   0.2202   1.0684 101.1911  36.8110  32.5770
Update Cnt = 1340    ET =   1372.0   Stats:  Mean, Std, Min, Max
r_f      |  -23.31   10.18  -13.89 |  181.62  164.07  202.49 | -391.48 -365.49 -389.08 |  381.89  375.04  397.49
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.12 |    0.09    0.09    0.10
r_i      |  -83.51   59.99  -64.54 |  664.85  633.97  787.74 |-1316.33-1243.76-1322.97 | 1330.21 1291.67 1272.09
v_i      |    0.01   -0.00    0.00 |    0.04    0.04    0.05 |   -0.08   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.17 |    0.07 |    0.02 |    0.42
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f    

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0023   9.0125   2.2441   3.0464
ADVA:  (22894,) (35196,) 0.6504716445050573
ADV1:  0.0017567422963148576 0.0004936732594654711 0.009249879151255153 0.05569411485608469 -0.07473598878496607
ADVB:  (21910,) (35196,) 0.6225139220365951
ADV2:  0.19580151128544993 0.35870884745771975 0.4425027912500382 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5047   0.2413   1.0472 101.1911  36.8110  32.5770
***** Episode 41848, Mean R = -11.8  Std R = 5.7  Min R = -26.9
PolicyLoss: 1.89
Policy_Entropy: 0.197
Policy_KL: 0.00513
Policy_SD: 0.549
Steps: 1.16e+04
TotalSteps: 1.52e+07
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.000242


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0013   0.0050   9.0125   2.2441   3.0464
ADVA:  (20735,) (35057,) 0.5914653278945717
ADV1:  0.0017023994158398

***** Episode 42065, Mean R = -9.6  Std R = 3.2  Min R = -19.6
PolicyLoss: 2.32
Policy_Entropy: 0.199
Policy_KL: 0.00564
Policy_SD: 0.555
Steps: 1.17e+04
TotalSteps: 1.53e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.000214


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0015   0.0059   9.0125   2.2441   3.0464
ADVA:  (17628,) (35071,) 0.5026375067719768
ADV1:  0.0 -0.00029185757290431125 0.0070924137225491434 0.037432061584419585 -0.06789113657839566
ADVB:  (20205,) (35071,) 0.5761170197599156
ADV2:  0.13956597959801997 0.36825104759662675 0.4951285493593141 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3291   0.5583   2.3438 101.1911  36.8110  32.5770
***** Episode 42096, Mean R = -10.5  Std R = 5.3  Min R = -26.4
PolicyLoss: 2.06
Policy_Entropy: 0.199
Policy_KL: 0.0056
Policy_SD: 0.552
Steps: 1.17e+04
TotalSteps: 1.53e+07
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.961
VF_0_Loss : 0.000463


Dynamics: Max Disturbance

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4921   0.7591   3.2793 101.1911  36.8110  32.5770
***** Episode 42313, Mean R = -10.6  Std R = 5.0  Min R = -21.2
PolicyLoss: 1.94
Policy_Entropy: 0.198
Policy_KL: 0.00611
Policy_SD: 0.546
Steps: 1.16e+04
TotalSteps: 1.54e+07
VF_0_ExplainedVarNew: 0.952
VF_0_ExplainedVarOld: 0.947
VF_0_Loss : 0.000205


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0011   0.0039   9.0125   2.2441   3.0464
ADVA:  (20126,) (35242,) 0.5710799614096816
ADV1:  0.0011000312363095566 0.00020945106487259786 0.00891787484427877 0.07175763108274663 -0.09128722488439722
ADVB:  (22785,) (35242,) 0.6465297088700982
ADV2:  0.24185862561558513 0.39068245838278715 0.47046732394761703 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7005   0.3790   1.5559 101.1911  36.8110  32.5770
***** Episode 42344, Mean R = -9.7  Std R = 4.3  Min R = -21.2
PolicyLoss: 1.97
Policy_Entropy: 0.199
Policy_KL: 0.00491
Policy_SD: 0.537
Steps: 1.19e+04
TotalS

ADVA:  (19825,) (35260,) 0.5622518434486671
ADV1:  0.0013851153681698532 0.0010120289628468106 0.007520197447912013 0.07858074513075242 -0.06717406440961615
ADVB:  (22006,) (35260,) 0.6241066364152014
ADV2:  0.24256574021714936 0.43610495342249156 0.5354325395280457 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3931   0.7453   2.8958 101.1911  36.8110  32.5770
***** Episode 42561, Mean R = -11.1  Std R = 5.7  Min R = -31.0
PolicyLoss: 2.27
Policy_Entropy: 0.199
Policy_KL: 0.006
Policy_SD: 0.55
Steps: 1.17e+04
TotalSteps: 1.55e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.000566


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0023   9.0125   2.2441   3.0464
ADVA:  (20016,) (35389,) 0.5655994800644268
ADV1:  0.0006425560313329474 0.0004682907201219017 0.007424772040611516 0.0456084465867207 -0.06717406440961615
ADVB:  (19928,) (35389,) 0.5631128316708581
ADV2:  0.13388822175092147 0.38488650472371794 0.5182346509833021 3

attitude |   -0.19    0.01    0.17 |    1.17    0.68    1.80 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.01    0.20 |    0.67    1.79 |   -1.36   -3.13 |    1.46    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.04   -0.03 |    0.03    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.26 |    0.00 |    1.66
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.97   -0.96 |    0.98    0.98
cs_angles |  0.0017  0.0025 |  0.0755  0.0769 | -0.9721 -0.9566 |  0.9758  0.9806
optical_flow | -0.0000 -0.0001 |  0.0202  0.0228 | -0.9899 -1.1336 |  1.0418  1.1800
v_err    | -0.0112 |  0.0599 | -0.4531 |  0.1173
landing_rewards |    9.58 |    2.00 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3112   0.1241   0.5555 101.1911  36.8110  32.5770
Update Cnt = 1390    ET =   1428.2   Stats:  Mean, Std, Min, Max
r_f      |  -10.95   -7.94   22.42 |  184.39  180.24  191.52 | -388.60 -391.04 -347.87 |  383.20  373.80  388.04
v_f      |    0.00    0.00   -0.00 |    0.05    0.05    0.05 |   -0.09   -0.12   -0.10 |    0.12    0.13    0.10
r_i      |  -46.07  -26.75   62.99 |  672.00  696.74  741.65 |-1287.33-1322.55-1247.04 | 1353.79 1317.88 1337.84
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.17 |    0.07 |    0.03 |    0.45
norm_vf  |    0.08 |    0.01 |    0.03 |    0.14
gs_f     |    1.30 |    2.54 |    0.02 |   35.42
thrust   |   -0.00    0.00    0.00 |    0.67    0.68    0.68 |   -3.39   -3.27   -3.37 |    3.41    3.44    3.38
norm_thrust |    0.91 |    0.75 |    0.00 |    3.46
fuel     |    1.58 |    0.19 |    1.11 |    2.25
rewards  |  -10.70 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3131   0.1186   0.5832 101.1911  36.8110  32.5770
***** Episode 43398, Mean R = -10.5  Std R = 4.1  Min R = -18.2
PolicyLoss: 2.12
Policy_Entropy: 0.2
Policy_KL: 0.00459
Policy_SD: 0.551
Steps: 1.16e+04
TotalSteps: 1.58e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.000109


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0028   9.0125   2.2441   3.0464
ADVA:  (20430,) (34855,) 0.5861425907330369
ADV1:  0.0010763216634926184 0.0005628931439959065 0.008385280359894618 0.04510099911319809 -0.08054495616861451
ADVB:  (21788,) (34855,) 0.6251040022952231
ADV2:  0.2135198886766094 0.4147469713775942 0.519186898955156 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2402   0.0742   0.3876 101.1911  36.8110  32.5770
***** Episode 43429, Mean R = -12.9  Std R = 6.6  Min R = -28.7
PolicyLoss: 2.15
Policy_Entropy: 0.2
Policy_KL: 0.00465
Policy_SD: 0.556
Steps: 1.17e+04
TotalSteps: 1

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1163   0.4405   1.9640 101.1911  36.8110  32.5770
***** Episode 43646, Mean R = -11.0  Std R = 6.0  Min R = -33.6
PolicyLoss: 2.04
Policy_Entropy: 0.201
Policy_KL: 0.00588
Policy_SD: 0.564
Steps: 1.18e+04
TotalSteps: 1.59e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.00261


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0011   0.0039   9.0125   2.2441   3.0464
ADVA:  (19615,) (35432,) 0.5535956197787311
ADV1:  0.0005442151636746023 0.00012229219442777098 0.008115310442640248 0.054515845877779134 -0.1501557648871159
ADVB:  (21571,) (35432,) 0.6087999548430797
ADV2:  0.18744766328754153 0.40014993359495893 0.5200071841536646 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7060   0.3778   1.4241 101.1911  36.8110  32.5770
***** Episode 43677, Mean R = -11.2  Std R = 4.6  Min R = -23.4
PolicyLoss: 2.14
Policy_Entropy: 0.201
Policy_KL: 0.00639
Policy_SD: 0.561
Steps: 1.19e+04
TotalS

ADVA:  (19766,) (35283,) 0.5602131338038149
ADV1:  0.0 -0.00059475736798792 0.007923561171573626 0.04842320361695529 -0.0657153524583088
ADVB:  (18316,) (35283,) 0.5191168551427033
ADV2:  0.041461364452414405 0.3546501137294536 0.5016928732860545 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3466   0.1164   0.5448 101.1911  36.8110  32.5770
***** Episode 43894, Mean R = -12.3  Std R = 7.3  Min R = -33.7
PolicyLoss: 2.21
Policy_Entropy: 0.2
Policy_KL: 0.00837
Policy_SD: 0.552
Steps: 1.17e+04
TotalSteps: 1.6e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000231


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0015   9.0125   2.2441   3.0464
ADVA:  (21808,) (35200,) 0.6195454545454545
ADV1:  0.0011506734428578316 0.00047623835255260883 0.008265010376626592 0.04894450334615036 -0.07474467220762246
ADVB:  (20904,) (35200,) 0.5938636363636364
ADV2:  0.17814982406131025 0.38033590665569894 0.484460242433219 3.0 0.0
Policy  Grad

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0010   0.0044   9.0125   2.2441   3.0464
ADVA:  (20967,) (35112,) 0.5971462747778538
ADV1:  0.0 -0.00030401538363662874 0.009273263027012785 0.04294466046293527 -0.09253689205443866
ADVB:  (18044,) (35112,) 0.513898382319435
ADV2:  0.03235877271793981 0.3794213009024961 0.5541233676163767 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.4195   2.3270   9.6341 101.1911  36.8110  32.5770
***** Episode 44142, Mean R = -11.6  Std R = 6.0  Min R = -26.5
PolicyLoss: 2.41
Policy_Entropy: 0.2
Policy_KL: 0.0131
Policy_SD: 0.552
Steps: 1.18e+04
TotalSteps: 1.61e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.000722


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0012   9.0125   2.2441   3.0464
ADVA:  (23247,) (35473,) 0.6553435006906662
ADV1:  0.0 -0.001594384074328289 0.009163690629946333 0.06439294924977984 -0.09253689205443866
ADVB:  (15300,) (35473,) 0.4313139570941279

cs_angles |  0.0006  0.0025 |  0.0751  0.0769 | -0.9957 -0.9956 |  0.9615  0.9702
optical_flow |  0.0000 -0.0000 |  0.0205  0.0236 | -1.2590 -1.4005 |  0.9570  1.1224
v_err    | -0.0112 |  0.0600 | -0.4520 |  0.1289
landing_rewards |    9.39 |    2.40 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.03
tracking_rewards |  -15.84 |    4.34 |  -32.69 |   -8.05
steps    |     381 |      19 |     335 |     419
***** Episode 44390, Mean R = -9.8  Std R = 4.6  Min R = -23.2
PolicyLoss: 2.34
Policy_Entropy: 0.201
Policy_KL: 0.00428
Policy_SD: 0.55
Steps: 1.17e+04
TotalSteps: 1.62e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.00109


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0009   0.0029   9.0125   2.2441   3.0464
ADVA:  (18790,) (35068,) 0.5358161286643094
ADV1:  0.0 -0.0005862649021417721 0.009063753693168329 0.12094968401759526 -0.06643951602633358
ADVB:  (18709,) (35068,) 0.5335063305577735
ADV2:  0.06617210262006504 0

attitude |    0.06   -0.06   -0.06 |    1.15    0.64    1.82 |   -3.14   -1.55   -3.14 |    3.14    1.48    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.06   -0.16 |    0.64    1.82 |   -1.55   -3.14 |    1.37    3.14
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.02 |    0.04    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.23 |    0.26 |    0.00 |    1.42
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.99   -1.00 |    0.97    1.00
cs_angles |  0.0014  0.0010 |  0.0749  0.0779 | -0.9914 -0.9999 |  0.9719  0.9990
optical_flow | -0.0001  0.0001 |  0.0194  0.0230 | -1.2672 -1.1509 |  0.9824  1.1831
v_err    | -0.0114 |  0.0597 | -0.4525 |  0.1069
landing_rewards |    9.74 |    1.59 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4554   0.1355   0.6862 101.1911  36.8110  32.5770
Update Cnt = 1450    ET =   1492.1   Stats:  Mean, Std, Min, Max
r_f      |    9.34   10.59    5.13 |  190.75  166.42  195.42 | -394.96 -389.09 -360.93 |  399.71  373.90  388.82
v_f      |   -0.00    0.00   -0.00 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.10    0.10    0.10
r_i      |   41.67    3.64   12.08 |  703.12  624.65  764.33 |-1323.35-1305.71-1300.02 | 1337.23 1285.24 1294.07
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.16 |    0.07 |    0.02 |    0.42
norm_vf  |    0.08 |    0.01 |    0.04 |    0.11
gs_f     |    1.37 |    1.85 |    0.01 |   13.97
thrust   |   -0.00   -0.00    0.00 |    0.66    0.68    0.67 |   -3.28   -3.45   -3.46 |    3.40    3.43    3.33
norm_thrust |    0.89 |    0.75 |    0.00 |    3.46
fuel     |    1.54 |    0.19 |    1.09 |    2.14
rewards  |  -10.74 

ADVA:  (22461,) (35273,) 0.6367760043092451
ADV1:  0.000950598296385597 -0.00027513206748047475 0.008739984787117818 0.06586972047364847 -0.06819817497813252
ADVB:  (21171,) (35273,) 0.6002041221330763
ADV2:  0.16152345546185065 0.3234431976831174 0.40796172933931113 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3512   0.1340   0.5725 101.1911  36.8110  32.5770
***** Episode 45258, Mean R = -10.9  Std R = 6.4  Min R = -28.7
PolicyLoss: 1.72
Policy_Entropy: 0.204
Policy_KL: 0.00604
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 1.65e+07
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.00022


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0040   0.0025   0.0086   9.0125   2.2441   3.0464
ADVA:  (20672,) (35128,) 0.5884764290594398
ADV1:  0.0022293760591584836 0.001318933349918138 0.008173951291113295 0.06586972047364847 -0.06614161689765641
ADVB:  (25173,)

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0009   0.0038   9.0125   2.2441   3.0464
ADVA:  (20157,) (35074,) 0.5746992073900895
ADV1:  0.00021729332208847256 -0.00022073511000825292 0.00814349084055462 0.059362180083701366 -0.08859887782548304
ADVB:  (19630,) (35074,) 0.5596738324684952
ADV2:  0.11001190865494852 0.3463585121059265 0.47855956011055584 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2698   0.0927   0.4709 101.1911  36.8110  32.5770
***** Episode 45506, Mean R = -11.0  Std R = 5.6  Min R = -27.6
PolicyLoss: 1.97
Policy_Entropy: 0.203
Policy_KL: 0.00622
Policy_SD: 0.545
Steps: 1.17e+04
TotalSteps: 1.66e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.000495


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0016   0.0058   9.0125   2.2441   3.0464
ADVA:  (20774,) (35217,) 0.5898855666297527
ADV1:  0.0 -0.00040546235071203904 0.007963595518899238 0.059362180083701366 -0.08676812479116645
ADVB:  (18696,) 

***** Episode 45723, Mean R = -10.1  Std R = 5.0  Min R = -21.3
PolicyLoss: 1.8
Policy_Entropy: 0.204
Policy_KL: 0.0067
Policy_SD: 0.543
Steps: 1.18e+04
TotalSteps: 1.67e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 4.57e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0019   0.0072   9.0125   2.2441   3.0464
ADVA:  (21655,) (35404,) 0.6116540503897865
ADV1:  0.0 -0.00118773785410912 0.009027085951094933 0.04813720692625137 -0.06930671318094639
ADVB:  (19755,) (35404,) 0.5579877979889278
ADV2:  0.09558834129674426 0.28989732769256793 0.411643974614178 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5684   0.2612   1.0291 101.1911  36.8110  32.5770
***** Episode 45754, Mean R = -10.9  Std R = 5.4  Min R = -28.3
PolicyLoss: 1.66
Policy_Entropy: 0.205
Policy_KL: 0.00526
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 1.67e+07
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.961
VF_0_Loss : 9.65e-05


ValFun  Gradients: u/sd/Max/C M

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8989   0.4597   1.7783 101.1911  36.8110  32.5770
***** Episode 45971, Mean R = -11.3  Std R = 6.8  Min R = -28.6
PolicyLoss: 1.73
Policy_Entropy: 0.204
Policy_KL: 0.00611
Policy_SD: 0.543
Steps: 1.18e+04
TotalSteps: 1.68e+07
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.000104


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0018   0.0065   9.0125   2.2441   3.0464
ADVA:  (20897,) (35276,) 0.5923857580224515
ADV1:  0.0019651424013658516 0.0011253401712819737 0.008461520668552968 0.04806198852750143 -0.063213192440492
ADVB:  (23624,) (35276,) 0.6696904410930945
ADV2:  0.3044051906418149 0.44835309947305657 0.5052860136134745 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3862   0.1786   0.7255 101.1911  36.8110  32.5770
***** Episode 46002, Mean R = -9.0  Std R = 5.4  Min R = -29.2
PolicyLoss: 2.15
Policy_Entropy: 0.205
Policy_KL: 0.00521
Policy_SD: 0.536
Steps: 1.18e+04
TotalSteps:

w        |    0.00   -0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.02   -0.08 |    0.63    1.85 |   -1.50   -3.14 |    1.47    3.14
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.04    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.26 |    0.00 |    1.60
seeker_angles |    0.00   -0.00 |    0.07    0.08 |   -0.99   -0.96 |    0.93    0.94
cs_angles |  0.0010 -0.0013 |  0.0736  0.0797 | -0.9879 -0.9628 |  0.9252  0.9389
optical_flow | -0.0001 -0.0001 |  0.0211  0.0206 | -1.0439 -1.1391 |  1.1236  0.8767
v_err    | -0.0113 |  0.0597 | -0.4532 |  0.1111
landing_rewards |    9.26 |    2.62 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -15.88 |    4.58 |  -34.23 |   -8.55
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.9644   2.6456  10.6229 101.1911  36.8110  32.5770
Update Cnt = 1500    ET =   1463.1   Stats:  Mean, Std, Min, Max
r_f      |   15.36  -13.55    9.29 |  185.69  175.66  195.46 | -394.34 -396.59 -386.27 |  393.93  384.77  377.81
v_f      |   -0.00    0.00   -0.00 |    0.05    0.05    0.05 |   -0.10   -0.13   -0.10 |    0.10    0.10    0.09
r_i      |   71.47  -32.99   37.59 |  707.70  672.66  725.03 |-1323.46-1309.66-1322.98 | 1321.88 1347.57 1264.74
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.10    0.09
norm_rf  |    0.16 |    0.06 |    0.01 |    0.35
norm_vf  |    0.08 |    0.01 |    0.03 |    0.13
gs_f     |    1.10 |    1.77 |    0.00 |   20.35
thrust   |    0.00   -0.01    0.00 |    0.67    0.67    0.68 |   -3.42   -3.45   -3.46 |    3.46    2.99    3.36
norm_thrust |    0.90 |    0.75 |    0.00 |    3.46
fuel     |    1.55 |    0.20 |    1.16 |    2.33
rewards  |  -11.01 

ADVA:  (20491,) (35271,) 0.5809588613875422
ADV1:  0.0015653798746755295 0.001219455956850815 0.008254742522859813 0.08864894448666172 -0.09351133375906023
ADVB:  (22196,) (35271,) 0.6292988574182756
ADV2:  0.23560723206912584 0.43672830649991456 0.5430792432164931 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.7418   1.4914   4.8247 101.1911  36.8110  32.5770
***** Episode 46808, Mean R = -10.2  Std R = 5.7  Min R = -25.1
PolicyLoss: 2.23
Policy_Entropy: 0.204
Policy_KL: 0.00579
Policy_SD: 0.543
Steps: 1.18e+04
TotalSteps: 1.71e+07
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.000253


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0015   0.0058   9.0125   2.2441   3.0464
ADVA:  (21776,) (35227,) 0.6181622051267494
ADV1:  0.00022757865807858553 -0.0008114991450477994 0.009191661358346292 0.07459244341250476 -0.09351133375906023
ADVB:  (19275,) (35227,) 0.5471655264427854
ADV2:  0.06995129593743833 0.2733477431254123 0.397571545326526

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0009   0.0037   9.0125   2.2441   3.0464
ADVA:  (21431,) (35279,) 0.6074718671164149
ADV1:  0.0 -0.0011401702823833773 0.008998147471813605 0.06362982971692172 -0.09228286938280267
ADVB:  (18911,) (35279,) 0.5360412710110831
ADV2:  0.059037258080681304 0.28232471780352414 0.4074449187155028 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7009   0.2202   1.2143 101.1911  36.8110  32.5770
***** Episode 47056, Mean R = -11.0  Std R = 5.5  Min R = -27.3
PolicyLoss: 1.68
Policy_Entropy: 0.205
Policy_KL: 0.00548
Policy_SD: 0.545
Steps: 1.2e+04
TotalSteps: 1.72e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000195


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0011   0.0046   9.0125   2.2441   3.0464
ADVA:  (22244,) (35507,) 0.6264680203903455
ADV1:  0.000695938705793887 -0.0005299697464302799 0.009031464379184362 0.06362982971692172 -0.09228286938280267
ADVB:  (20460,) (3550

***** Episode 47273, Mean R = -11.9  Std R = 5.2  Min R = -25.3
PolicyLoss: 1.87
Policy_Entropy: 0.204
Policy_KL: 0.00647
Policy_SD: 0.554
Steps: 1.16e+04
TotalSteps: 1.73e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 8.92e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0016   0.0060   9.0125   2.2441   3.0464
ADVA:  (21893,) (34928,) 0.6268037104901512
ADV1:  0.0008225948778556175 -0.0003012040703433276 0.008833150521551284 0.04400179869720483 -0.0812484177397712
ADVB:  (21386,) (34928,) 0.6122881355932204
ADV2:  0.16533776437951528 0.31785910981292237 0.4143008693450278 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3254   0.1128   0.5542 101.1911  36.8110  32.5770
***** Episode 47304, Mean R = -12.0  Std R = 6.0  Min R = -32.5
PolicyLoss: 1.67
Policy_Entropy: 0.205
Policy_KL: 0.00642
Policy_SD: 0.553
Steps: 1.16e+04
TotalSteps: 1.73e+07
VF_0_ExplainedVarNew: 0.96
VF_0_ExplainedVarOld: 0.958
VF_0_Loss : 9.6e-05


ValFun  Grad

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6730   0.3084   1.2989 101.1911  36.8110  32.5770
***** Episode 47521, Mean R = -9.2  Std R = 3.9  Min R = -19.4
PolicyLoss: 1.67
Policy_Entropy: 0.205
Policy_KL: 0.00617
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 1.73e+07
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.000169


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0010   0.0036   9.0125   2.2441   3.0464
ADVA:  (21462,) (35028,) 0.6127098321342925
ADV1:  0.0007370335678404775 5.8207968371838046e-05 0.0071654399975522 0.04682376891715434 -0.05284838577709944
ADVB:  (20086,) (35028,) 0.5734269727075483
ADV2:  0.12299566082831585 0.3210812201938074 0.43220237378701487 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7097   0.3072   1.4243 101.1911  36.8110  32.5770
***** Episode 47552, Mean R = -7.5  Std R = 4.0  Min R = -25.9
PolicyLoss: 1.78
Policy_Entropy: 0.206
Policy_KL: 0.00746
Policy_SD: 0.53
Steps: 1.17e+04
TotalSteps

w        |    0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.03   -0.11 |    0.66    1.92 |   -1.52   -3.14 |    1.41    3.11
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.03    0.02    0.04
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.26 |    0.00 |    1.42
seeker_angles |    0.00   -0.00 |    0.07    0.08 |   -0.96   -0.99 |    0.99    0.96
cs_angles |  0.0005 -0.0008 |  0.0740  0.0783 | -0.9595 -0.9940 |  0.9859  0.9646
optical_flow | -0.0000  0.0000 |  0.0203  0.0220 | -1.0347 -1.0696 |  1.0060  1.0451
v_err    | -0.0112 |  0.0595 | -0.4529 |  0.1216
landing_rewards |    9.58 |    2.00 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -15.47 |    4.52 |  -44.27 |   -8.72
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  10.6013   5.7761  23.8037 101.1911  36.8110  32.5770
Update Cnt = 1550    ET =   1386.7   Stats:  Mean, Std, Min, Max
r_f      |   -9.89   -9.09    8.85 |  193.68  163.94  195.70 | -387.15 -377.02 -396.86 |  389.59  377.52  393.65
v_f      |    0.01    0.01   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.11    0.11    0.11
r_i      |  -56.00 -106.29   18.63 |  678.36  650.32  765.66 |-1321.84-1332.63-1359.13 | 1264.38 1294.57 1308.48
v_i      |    0.00    0.01   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.10    0.10
norm_rf  |    0.16 |    0.07 |    0.02 |    0.38
norm_vf  |    0.08 |    0.01 |    0.04 |    0.13
gs_f     |    1.42 |    2.86 |    0.01 |   38.63
thrust   |   -0.01    0.00    0.01 |    0.66    0.68    0.68 |   -3.45   -3.45   -3.45 |    3.46    3.38    3.38
norm_thrust |    0.90 |    0.74 |    0.00 |    3.46
fuel     |    1.53 |    0.19 |    1.04 |    2.33
rewards  |  -10.39 

ADVA:  (20860,) (35008,) 0.595863802559415
ADV1:  0.0 -0.0007722097256571713 0.00942001515725165 0.056295457461520704 -0.11675227082593426
ADVB:  (19513,) (35008,) 0.5573868829981719
ADV2:  0.0800888856231993 0.3027297619044736 0.4349363478094451 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8830   0.2767   1.3405 101.1911  36.8110  32.5770
***** Episode 48358, Mean R = -9.6  Std R = 4.7  Min R = -22.1
PolicyLoss: 1.72
Policy_Entropy: 0.207
Policy_KL: 0.00663
Policy_SD: 0.543
Steps: 1.16e+04
TotalSteps: 1.77e+07
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.954
VF_0_Loss : 0.000841


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0038   0.0022   0.0082   9.0125   2.2441   3.0464
ADVA:  (21198,) (34859,) 0.6081069451217763
ADV1:  0.0016592326841414259 0.0010405067166692824 0.008371286504125457 0.056295457461520704 -0.11675227082593426
ADVB:  (22419,) (34859,) 0.6431337674632089
ADV2:  0.2325410799682705 0.40290062564541357 0.48760639008643797 3.0 0.0
Policy  G

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0010   9.0125   2.2441   3.0464
ADVA:  (20202,) (35024,) 0.5768044769301051
ADV1:  0.0010396163018285987 0.00043115476081304906 0.008092058286881147 0.060813603573011465 -0.052453897866878874
ADVB:  (21353,) (35024,) 0.6096676564641389
ADV2:  0.19469873711332636 0.39120781276579936 0.48983669861610096 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5768   0.2438   1.1198 101.1911  36.8110  32.5770
***** Episode 48606, Mean R = -10.8  Std R = 5.1  Min R = -23.6
PolicyLoss: 2.03
Policy_Entropy: 0.208
Policy_KL: 0.00568
Policy_SD: 0.537
Steps: 1.15e+04
TotalSteps: 1.78e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.00178


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0006   9.0125   2.2441   3.0464
ADVA:  (19694,) (34954,) 0.5634262173141844
ADV1:  0.00044889394487086093 -0.0002283591811074952 0.007932159043479024 0.04026897014917452 -0.053867818117141725

***** Episode 48823, Mean R = -13.1  Std R = 7.8  Min R = -30.9
PolicyLoss: 1.71
Policy_Entropy: 0.208
Policy_KL: 0.00557
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 1.78e+07
VF_0_ExplainedVarNew: 0.96
VF_0_ExplainedVarOld: 0.956
VF_0_Loss : 0.000169


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0019   0.0073   9.0125   2.2441   3.0464
ADVA:  (21200,) (35117,) 0.6036962155081584
ADV1:  0.00022336359344050233 -0.0008386654780636056 0.00966707379022199 0.06095677872944 -0.07592009445040054
ADVB:  (19379,) (35117,) 0.551840988694934
ADV2:  0.09926763218054434 0.3235547063789936 0.44947181968410077 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7914   0.2833   1.3844 101.1911  36.8110  32.5770
***** Episode 48854, Mean R = -11.5  Std R = 6.0  Min R = -27.1
PolicyLoss: 1.86
Policy_Entropy: 0.208
Policy_KL: 0.00797
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 1.79e+07
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 9.44e-05


ValFun  Gradie

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7663   0.2604   1.3228 101.1911  36.8110  32.5770
***** Episode 49071, Mean R = -11.6  Std R = 5.8  Min R = -35.0
PolicyLoss: 1.88
Policy_Entropy: 0.207
Policy_KL: 0.00505
Policy_SD: 0.551
Steps: 1.17e+04
TotalSteps: 1.79e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.000137


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0010   0.0036   9.0125   2.2441   3.0464
ADVA:  (20683,) (34943,) 0.5919068196777609
ADV1:  0.0017376402037322337 0.0009150103371184641 0.008398154404298839 0.045088235108678654 -0.05950267649915986
ADVB:  (22362,) (34943,) 0.6399565005866698
ADV2:  0.2733640394512585 0.43087504245067165 0.5213911299350794 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8640   0.3072   1.3992 101.1911  36.8110  32.5770
***** Episode 49102, Mean R = -9.7  Std R = 4.3  Min R = -20.5
PolicyLoss: 2.14
Policy_Entropy: 0.208
Policy_KL: 0.00512
Policy_SD: 0.548
Steps: 1.16e+04
TotalSt

w        |    0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.10    0.05 |    0.63    1.87 |   -1.42   -3.14 |    1.37    3.13
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.26 |    0.00 |    1.31
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -1.00 |    1.00    1.00
cs_angles |  0.0013  0.0004 |  0.0768  0.0813 | -0.9896 -0.9972 |  0.9952  0.9998
optical_flow |  0.0000  0.0001 |  0.0216  0.0217 | -1.0205 -0.9253 |  1.3160  1.2849
v_err    | -0.0113 |  0.0598 | -0.4536 |  0.1103
landing_rewards |    9.58 |    2.00 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.01
tracking_rewards |  -15.99 |    4.42 |  -39.64 |   -8.60
steps    |     376 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0767   0.4595   2.1677 101.1911  36.8110  32.5770
Update Cnt = 1600    ET =   1378.8   Stats:  Mean, Std, Min, Max
r_f      |    1.90    5.50    7.08 |  180.82  166.55  210.16 | -396.55 -365.89 -392.41 |  378.57  380.27  387.10
v_f      |    0.00    0.00   -0.00 |    0.05    0.05    0.05 |   -0.10   -0.10   -0.10 |    0.11    0.12    0.11
r_i      |    5.58   28.54   33.96 |  684.97  659.65  761.90 |-1350.13-1325.26-1336.26 | 1249.79 1278.37 1312.70
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.10    0.10
norm_rf  |    0.16 |    0.07 |    0.02 |    0.38
norm_vf  |    0.08 |    0.01 |    0.05 |    0.13
gs_f     |    1.32 |    2.15 |    0.00 |   18.00
thrust   |   -0.00    0.00    0.00 |    0.66    0.68    0.66 |   -3.39   -3.45   -3.45 |    3.29    3.38    3.44
norm_thrust |    0.89 |    0.74 |    0.00 |    3.46
fuel     |    1.50 |    0.18 |    1.12 |    2.20
rewards  |  -10.53 

ADVA:  (22144,) (35327,) 0.6268293373340504
ADV1:  0.0006726908789512192 -8.653527412376328e-05 0.008008093433735158 0.0508754448561628 -0.07057972825944939
ADVB:  (19469,) (35327,) 0.5511082175106858
ADV2:  0.07907653021453856 0.29371824281572956 0.42581670483310685 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2608   0.0996   0.5510 101.1911  36.8110  32.5770
***** Episode 49908, Mean R = -9.0  Std R = 3.7  Min R = -21.5
PolicyLoss: 1.68
Policy_Entropy: 0.21
Policy_KL: 0.008
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 1.82e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 7.73e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0038   9.0125   2.2441   3.0464
ADVA:  (20728,) (35132,) 0.5900034156894
ADV1:  0.00041739238612584044 -0.0003658149644384144 0.008254084553202805 0.043223629962614585 -0.07057972825944939
ADVB:  (19744,) (35132,) 0.5619947626095867
ADV2:  0.1031726284586265 0.3058274508761513 0.4274796513294041 3.

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0014   0.0049   9.0125   2.2441   3.0464
ADVA:  (23325,) (35028,) 0.6658958547447756
ADV1:  0.0 -0.001140590707196879 0.008550798007022778 0.0577781978121194 -0.06388352814735171
ADVB:  (15425,) (35028,) 0.4403619961173918
ADV2:  0.0 0.21153983841082294 0.38261140797767784 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5001   0.2351   1.1752 101.1911  36.8110  32.5770
***** Episode 50156, Mean R = -11.0  Std R = 5.3  Min R = -20.5
PolicyLoss: 1.51
Policy_Entropy: 0.209
Policy_KL: 0.00676
Policy_SD: 0.549
Steps: 1.17e+04
TotalSteps: 1.83e+07
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.955
VF_0_Loss : 0.00233


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0022   9.0125   2.2441   3.0464
ADVA:  (23840,) (35061,) 0.6799577878554519
ADV1:  0.0005998265571090303 -0.0007503567732560755 0.009617290557971404 0.0577781978121194 -0.06388352814735171
ADVB:  (18575,) (35061,) 0.529790935797

***** Episode 50373, Mean R = -11.0  Std R = 5.3  Min R = -27.4
PolicyLoss: 1.68
Policy_Entropy: 0.21
Policy_KL: 0.00735
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 1.84e+07
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.961
VF_0_Loss : 0.000116


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0009   9.0125   2.2441   3.0464
ADVA:  (22006,) (35436,) 0.6210068856530082
ADV1:  0.001775006135447713 0.0003968556257953973 0.009934072201804676 0.05480625166034725 -0.07243486078301413
ADVB:  (22984,) (35436,) 0.6486059374647252
ADV2:  0.25014627062872263 0.3909331296912538 0.4609953543480598 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9624   0.4564   2.1523 101.1911  36.8110  32.5770
***** Episode 50404, Mean R = -12.8  Std R = 6.0  Min R = -24.0
PolicyLoss: 1.92
Policy_Entropy: 0.209
Policy_KL: 0.00608
Policy_SD: 0.533
Steps: 1.18e+04
TotalSteps: 1.84e+07
VF_0_ExplainedVarNew: 0.954
VF_0_ExplainedVarOld: 0.95
VF_0_Loss : 0.000295


ValFun  Gradi

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7641   0.3022   1.3691 101.1911  36.8110  32.5770
***** Episode 50621, Mean R = -10.3  Std R = 4.6  Min R = -18.8
PolicyLoss: 1.76
Policy_Entropy: 0.209
Policy_KL: 0.00937
Policy_SD: 0.534
Steps: 1.2e+04
TotalSteps: 1.85e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.000259


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0011   0.0047   9.0125   2.2441   3.0464
ADVA:  (21943,) (35369,) 0.6204020469903022
ADV1:  0.001088698105987945 7.334664679812412e-06 0.008759688692079546 0.07460272951816016 -0.08389508795755013
ADVB:  (21064,) (35369,) 0.5955497752268936
ADV2:  0.15623422368659762 0.32859437632209154 0.44239634465041855 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4552   0.1664   0.8542 101.1911  36.8110  32.5770
***** Episode 50652, Mean R = -11.1  Std R = 5.7  Min R = -27.1
PolicyLoss: 1.74
Policy_Entropy: 0.21
Policy_KL: 0.00555
Policy_SD: 0.537
Steps: 1.17e+04
TotalSte

seeker_angles |    0.00    0.00 |    0.07    0.08 |   -1.00   -0.99 |    0.97    0.99
cs_angles |  0.0036  0.0007 |  0.0749  0.0774 | -0.9966 -0.9923 |  0.9728  0.9921
optical_flow |  0.0001 -0.0001 |  0.0203  0.0207 | -1.0195 -1.0156 |  1.1151  1.0374
v_err    | -0.0111 |  0.0596 | -0.4625 |  0.1177
landing_rewards |    9.29 |    2.57 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.03
tracking_rewards |  -15.48 |    4.36 |  -41.68 |   -8.22
steps    |     380 |      20 |     330 |     419
***** Episode 50900, Mean R = -10.1  Std R = 4.2  Min R = -19.4
PolicyLoss: 1.69
Policy_Entropy: 0.21
Policy_KL: 0.00685
Policy_SD: 0.536
Steps: 1.18e+04
TotalSteps: 1.86e+07
VF_0_ExplainedVarNew: 0.962
VF_0_ExplainedVarOld: 0.959
VF_0_Loss : 0.000593


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0019   9.0125   2.2441   3.0464
ADVA:  (22315,) (35059,) 0.6364984740009698
ADV1:  0.001780125843197089 0.0002146689054594934 0.009359282902234952 0.0473320

w        |    0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.02   -0.12 |    0.66    1.77 |   -1.50   -3.10 |    1.46    3.12
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.03 |    0.03    0.03    0.05
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.59
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.95   -1.00 |    0.94    1.00
cs_angles |  0.0026  0.0024 |  0.0763  0.0753 | -0.9536 -0.9964 |  0.9367  0.9963
optical_flow |  0.0001 -0.0000 |  0.0200  0.0209 | -1.0619 -1.1951 |  1.1172  1.1688
v_err    | -0.0111 |  0.0599 | -0.4536 |  0.1013
landing_rewards |    9.29 |    2.57 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -15.32 |    4.35 |  -34.69 |   -8.93
steps    |     376 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1921   0.0699   0.3459 101.1911  36.8110  32.5770
Update Cnt = 1660    ET =   1303.0   Stats:  Mean, Std, Min, Max
r_f      |   11.18   -2.41   -3.11 |  185.23  169.33  202.72 | -395.50 -376.09 -388.86 |  394.37  357.55  389.08
v_f      |   -0.00    0.00    0.00 |    0.04    0.05    0.05 |   -0.10   -0.10   -0.12 |    0.10    0.10    0.10
r_i      |   62.48  -13.83  -14.42 |  685.41  653.24  762.44 |-1384.26-1323.60-1255.38 | 1332.53 1318.57 1232.16
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.10    0.09    0.10
norm_rf  |    0.15 |    0.07 |    0.02 |    0.39
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.19 |    1.56 |    0.01 |   14.33
thrust   |   -0.00   -0.00   -0.01 |    0.66    0.68    0.66 |   -3.43   -3.31   -3.46 |    3.37    3.31    3.45
norm_thrust |    0.89 |    0.74 |    0.00 |    3.46
fuel     |    1.50 |    0.18 |    1.12 |    2.11
rewards  |  -10.07 

ADVA:  (20432,) (35167,) 0.5809992322347656
ADV1:  0.0 -0.0007538388585035444 0.008354026482731921 0.05417547805859679 -0.083406775834932
ADVB:  (19665,) (35167,) 0.5591890124264225
ADV2:  0.09384761625902109 0.31549355541507845 0.4402370083051102 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1922   0.5560   2.6358 101.1911  36.8110  32.5770
***** Episode 51768, Mean R = -9.6  Std R = 4.4  Min R = -21.6
PolicyLoss: 1.77
Policy_Entropy: 0.21
Policy_KL: 0.00755
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 1.9e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.00012


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0025   9.0125   2.2441   3.0464
ADVA:  (19193,) (35238,) 0.5446676882910494
ADV1:  0.0013085816474707537 0.0006145314282882344 0.007846577546396057 0.05417547805859679 -0.083406775834932
ADVB:  (23518,) (35238,) 0.6674045064986662
ADV2:  0.3111641055282739 0.45152503625568213 0.5096770398951046 3.0 0.0
Policy  Gradien

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0043   0.0025   0.0091   9.0125   2.2441   3.0464
ADVA:  (20936,) (35424,) 0.5910117434507678
ADV1:  0.0014949795558196043 0.0008244889823126509 0.008414854850971812 0.04716276254038443 -0.07804859730479569
ADVB:  (22015,) (35424,) 0.6214713188798555
ADV2:  0.24462480077012055 0.43573043104152037 0.5303274313964106 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9776   0.4478   1.9233 101.1911  36.8110  32.5770
***** Episode 52016, Mean R = -10.1  Std R = 5.2  Min R = -26.4
PolicyLoss: 2.2
Policy_Entropy: 0.211
Policy_KL: 0.00929
Policy_SD: 0.538
Steps: 1.17e+04
TotalSteps: 1.9e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 2.6e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0039   0.0026   0.0108   9.0125   2.2441   3.0464
ADVA:  (19712,) (35285,) 0.5586509848377498
ADV1:  0.0014413768868002906 0.0009058000437264054 0.008075184837175764 0.0683309988502428 -0.05447791053304757
ADVB:  (2

***** Episode 52233, Mean R = -9.5  Std R = 3.8  Min R = -19.3
PolicyLoss: 1.69
Policy_Entropy: 0.21
Policy_KL: 0.00634
Policy_SD: 0.538
Steps: 1.17e+04
TotalSteps: 1.91e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.00016


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0013   0.0051   9.0125   2.2441   3.0464
ADVA:  (20430,) (35375,) 0.5775265017667844
ADV1:  0.0003404434774492808 -0.00018093235007604253 0.007987170290437887 0.053603031441496085 -0.08687136176792609
ADVB:  (19910,) (35375,) 0.5628268551236749
ADV2:  0.12184371959396308 0.3689223420406934 0.49204543099609305 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0134   0.4677   1.9480 101.1911  36.8110  32.5770
***** Episode 52264, Mean R = -10.4  Std R = 3.9  Min R = -18.6
PolicyLoss: 2.06
Policy_Entropy: 0.211
Policy_KL: 0.00677
Policy_SD: 0.536
Steps: 1.19e+04
TotalSteps: 1.91e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 5.1e-05


ValFun  Gra

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5345   0.2103   1.0161 101.1911  36.8110  32.5770
***** Episode 52481, Mean R = -9.5  Std R = 4.3  Min R = -25.3
PolicyLoss: 2.04
Policy_Entropy: 0.212
Policy_KL: 0.0053
Policy_SD: 0.537
Steps: 1.18e+04
TotalSteps: 1.92e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.000104


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0009   0.0031   9.0125   2.2441   3.0464
ADVA:  (19442,) (35247,) 0.5515930433795784
ADV1:  0.0 -0.00022159405801572418 0.006107041205172707 0.09042904375548022 -0.07884819351031694
ADVB:  (18116,) (35247,) 0.5139728203818764
ADV2:  0.03225268735512736 0.36650133437718313 0.5387020804985412 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5109   0.6594   3.0430 101.1911  36.8110  32.5770
***** Episode 52512, Mean R = -10.9  Std R = 4.9  Min R = -23.1
PolicyLoss: 2.21
Policy_Entropy: 0.212
Policy_KL: 0.00796
Policy_SD: 0.536
Steps: 1.16e+04
TotalSteps: 1.92e+07
VF_

seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.97   -0.99 |    0.97    1.00
cs_angles |  0.0011  0.0025 |  0.0717  0.0774 | -0.9726 -0.9933 |  0.9734  0.9995
optical_flow | -0.0001 -0.0000 |  0.0186  0.0215 | -1.0329 -1.1436 |  1.0575  1.1437
v_err    | -0.0110 |  0.0597 | -0.4531 |  0.0969
landing_rewards |    9.68 |    1.77 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -15.29 |    3.93 |  -30.51 |   -8.02
steps    |     379 |      21 |     331 |     421
***** Episode 52760, Mean R = -10.6  Std R = 4.7  Min R = -25.6
PolicyLoss: 2.8
Policy_Entropy: 0.211
Policy_KL: 0.0091
Policy_SD: 0.542
Steps: 1.15e+04
TotalSteps: 1.93e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.000235


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0001   0.0006   9.0125   2.2441   3.0464
ADVA:  (17889,) (34723,) 0.5151916597068226
ADV1:  -0.00010022162920393131 -0.0008839547268245149 0.0058791307853185626 0.055

attitude |   -0.12   -0.01   -0.02 |    1.16    0.65    1.81 |   -3.14   -1.55   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.01   -0.03 |    0.65    1.82 |   -1.45   -3.14 |    1.49    3.14
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.04   -0.02 |    0.03    0.02    0.04
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.55
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -0.92 |    0.99    0.99
cs_angles |  0.0013  0.0029 |  0.0755  0.0770 | -0.9755 -0.9206 |  0.9944  0.9926
optical_flow | -0.0000  0.0001 |  0.0199  0.0212 | -1.2029 -0.9848 |  1.1085  1.1708
v_err    | -0.0109 |  0.0594 | -0.4517 |  0.1026
landing_rewards |    9.71 |    1.68 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4478   0.1416   0.7668 101.1911  36.8110  32.5770
Update Cnt = 1720    ET =   1190.3   Stats:  Mean, Std, Min, Max
r_f      |    8.27    7.62   -2.58 |  189.16  171.00  200.14 | -391.92 -396.09 -396.96 |  395.66  387.31  391.76
v_f      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.11   -0.10 |    0.11    0.10    0.09
r_i      |   27.04   18.10  -14.34 |  679.22  659.82  771.48 |-1335.80-1262.61-1282.30 | 1289.48 1314.10 1305.32
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.15 |    0.07 |    0.02 |    0.41
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.22 |    1.57 |    0.00 |   13.06
thrust   |    0.01   -0.00    0.01 |    0.66    0.68    0.68 |   -3.42   -3.45   -3.35 |    3.45    3.28    3.34
norm_thrust |    0.90 |    0.74 |    0.00 |    3.46
fuel     |    1.52 |    0.18 |    1.15 |    2.12
rewards  |  -10.02 

ADVA:  (19910,) (35311,) 0.5638469598708618
ADV1:  0.0007946680676157031 0.00017384404212176003 0.0073715999753710005 0.04363087262991694 -0.0713628465590335
ADVB:  (21610,) (35311,) 0.6119905978307043
ADV2:  0.19591255509077385 0.37031774926818756 0.47213964556528665 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6569   0.2811   1.3769 101.1911  36.8110  32.5770
***** Episode 53628, Mean R = -10.7  Std R = 4.9  Min R = -27.7
PolicyLoss: 1.89
Policy_Entropy: 0.213
Policy_KL: 0.00506
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 1.97e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 9.41e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0006   0.0020   9.0125   2.2441   3.0464
ADVA:  (19777,) (35321,) 0.5599218595170012
ADV1:  0.00089872676032273 0.0005782178974486706 0.006817892701402445 0.05254801966392819 -0.09122103737402304
ADVB:  (22051,) (35321,) 0.6243028226833894
ADV2:  0.2239362421702808 0.4121652359711119 0.5172119613172519

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0021   9.0125   2.2441   3.0464
ADVA:  (18652,) (34869,) 0.5349164013880524
ADV1:  0.00023617957315063502 0.0002046855813138129 0.006352948275989594 0.05423310266494685 -0.060955547097164144
ADVB:  (19450,) (34869,) 0.5578020591356219
ADV2:  0.11754822127230326 0.3782562304558854 0.5343459428456079 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5749   0.2538   1.1050 101.1911  36.8110  32.5770
***** Episode 53876, Mean R = -10.0  Std R = 4.2  Min R = -17.4
PolicyLoss: 2.11
Policy_Entropy: 0.212
Policy_KL: 0.00663
Policy_SD: 0.545
Steps: 1.17e+04
TotalSteps: 1.97e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.000493


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0028   9.0125   2.2441   3.0464
ADVA:  (19750,) (35052,) 0.5634485906652973
ADV1:  0.0 -8.07627819529114e-05 0.005815794588659828 0.05423310266494685 -0.050500534693917354
ADVB:  (17677,) (350

***** Episode 54093, Mean R = -11.2  Std R = 6.0  Min R = -31.1
PolicyLoss: 1.69
Policy_Entropy: 0.213
Policy_KL: 0.00527
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 1.98e+07
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.00308


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0023   9.0125   2.2441   3.0464
ADVA:  (21111,) (35193,) 0.599863609240474
ADV1:  0.00048170480167436004 -0.00021013634213860364 0.008052427913301618 0.06336347839314965 -0.05272498136457811
ADVB:  (20253,) (35193,) 0.5754837609751939
ADV2:  0.11817389280621639 0.3213834935522974 0.4579237374109499 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7916   0.3220   1.6279 101.1911  36.8110  32.5770
***** Episode 54124, Mean R = -10.2  Std R = 4.3  Min R = -22.9
PolicyLoss: 1.74
Policy_Entropy: 0.213
Policy_KL: 0.00659
Policy_SD: 0.538
Steps: 1.17e+04
TotalSteps: 1.98e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.000657


ValFun  Gr

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5500   0.2050   1.0498 101.1911  36.8110  32.5770
***** Episode 54341, Mean R = -9.4  Std R = 3.3  Min R = -17.3
PolicyLoss: 1.86
Policy_Entropy: 0.214
Policy_KL: 0.00667
Policy_SD: 0.536
Steps: 1.19e+04
TotalSteps: 1.99e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.000395


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0025   9.0125   2.2441   3.0464
ADVA:  (18056,) (35439,) 0.5094951889161659
ADV1:  0.0 -0.0003579521261559125 0.006280123094784552 0.040032753675067145 -0.09067551567487947
ADVB:  (19712,) (35439,) 0.556223369733909
ADV2:  0.10792296860848877 0.3591332964289044 0.4907464420544877 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3116   0.5506   2.5692 101.1911  36.8110  32.5770
***** Episode 54372, Mean R = -9.9  Std R = 4.2  Min R = -22.8
PolicyLoss: 2
Policy_Entropy: 0.214
Policy_KL: 0.00919
Policy_SD: 0.532
Steps: 1.17e+04
TotalSteps: 1.99e+07
VF_0_Exp

theta_cv |    0.21 |    0.25 |    0.00 |    1.45
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.97   -1.00 |    0.99    1.00
cs_angles |  0.0008  0.0026 |  0.0719  0.0795 | -0.9660 -0.9975 |  0.9941  0.9951
optical_flow |  0.0000  0.0000 |  0.0194  0.0224 | -1.1786 -0.9368 |  0.8294  1.2006
v_err    | -0.0111 |  0.0599 | -0.4552 |  0.1051
landing_rewards |    9.65 |    1.85 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -15.46 |    5.06 |  -72.17 |   -8.80
steps    |     379 |      20 |     337 |     417
***** Episode 54620, Mean R = -10.9  Std R = 5.6  Min R = -24.2
PolicyLoss: 1.69
Policy_Entropy: 0.214
Policy_KL: 0.00687
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps: 2e+07
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.000445


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0027   9.0125   2.2441   3.0464
ADVA:  (21152,) (35195,) 0.6009944594402614
ADV1:  0.0002494848643062944 -0.00

theta_cv |    0.21 |    0.25 |    0.00 |    1.34
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.99   -0.99 |    0.97    1.00
cs_angles |  0.0004  0.0010 |  0.0723  0.0778 | -0.9902 -0.9918 |  0.9720  0.9966
optical_flow |  0.0001  0.0000 |  0.0188  0.0229 | -0.9740 -1.1297 |  0.9537  1.0211
v_err    | -0.0108 |  0.0592 | -0.4526 |  0.1137
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -15.32 |    3.97 |  -29.38 |   -8.73
steps    |     379 |      20 |     339 |     419
***** Episode 54930, Mean R = -11.0  Std R = 6.2  Min R = -28.8
PolicyLoss: 1.81
Policy_Entropy: 0.215
Policy_KL: 0.00596
Policy_SD: 0.54
Steps: 1.18e+04
TotalSteps: 2.01e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.00021


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0005   0.0020   9.0125   2.2441   3.0464
ADVA:  (22148,) (35433,) 0.6250670279118336
ADV1:  0.001667716470330806 0.00

w        |    0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.04    0.30 |    0.68    1.83 |   -1.37   -3.14 |    1.43    3.13
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.02 |    0.04    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.42
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.96   -0.99 |    0.89    0.96
cs_angles |  0.0002  0.0022 |  0.0767  0.0773 | -0.9608 -0.9891 |  0.8888  0.9551
optical_flow |  0.0001 -0.0001 |  0.0189  0.0214 | -0.9119 -1.2015 |  0.9079  0.9217
v_err    | -0.0110 |  0.0599 | -0.4522 |  0.1104
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.08 |    0.02
tracking_rewards |  -15.51 |    4.12 |  -30.55 |   -8.54
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5255   0.2286   1.0245 101.1911  36.8110  32.5770
Update Cnt = 1790    ET =    803.5   Stats:  Mean, Std, Min, Max
r_f      |   -0.33  -12.49   16.24 |  186.22  175.04  198.47 | -396.42 -360.49 -373.85 |  394.08  373.59  393.49
v_f      |    0.00    0.00   -0.01 |    0.05    0.04    0.05 |   -0.11   -0.10   -0.11 |    0.09    0.09    0.11
r_i      |   -2.47  -50.79   67.47 |  692.69  640.82  763.39 |-1308.70-1322.46-1305.36 | 1355.59 1318.49 1305.92
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.09    0.10    0.10
norm_rf  |    0.16 |    0.07 |    0.03 |    0.46
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f     |    1.60 |    4.13 |    0.00 |   56.93
thrust   |   -0.00    0.00    0.00 |    0.67    0.68    0.67 |   -3.46   -3.33   -3.45 |    3.45    3.43    3.40
norm_thrust |    0.90 |    0.74 |    0.00 |    3.46
fuel     |    1.51 |    0.19 |    1.07 |    2.24
rewards  |  -10.22 

ADVA:  (21411,) (35321,) 0.6061832903938167
ADV1:  0.0008759944768041428 0.00011697356131442239 0.008047175230754516 0.0449389174553913 -0.08455255342284429
ADVB:  (21185,) (35321,) 0.5997848305540613
ADV2:  0.16108859264641517 0.35442622053177725 0.47060817954450673 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3683   0.1706   0.7501 101.1911  36.8110  32.5770
***** Episode 55798, Mean R = -10.2  Std R = 5.1  Min R = -22.9
PolicyLoss: 1.84
Policy_Entropy: 0.214
Policy_KL: 0.00623
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 2.05e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.000102


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0013   0.0051   9.0125   2.2441   3.0464
ADVA:  (21705,) (35368,) 0.6136903415516851
ADV1:  0.00030857116582321594 -0.0005470161019007998 0.008618630416407735 0.0449389174553913 -0.1017465745041024
ADVB:  (19549,) (35368,) 0.552731282515268
ADV2:  0.08400450561472866 0.31182161466789665 0.44705784317867

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0042   0.0026   0.0090   9.0125   2.2441   3.0464
ADVA:  (22914,) (34921,) 0.6561667764382463
ADV1:  0.0009593837484825531 -0.0003722069934510792 0.009602924061662814 0.04875099584859352 -0.05919044948056085
ADVB:  (21335,) (34921,) 0.6109504309727671
ADV2:  0.17820259299950097 0.3396649754632122 0.4236619381621128 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3946   0.1315   0.6031 101.1911  36.8110  32.5770
***** Episode 56046, Mean R = -10.4  Std R = 5.7  Min R = -30.8
PolicyLoss: 1.73
Policy_Entropy: 0.214
Policy_KL: 0.00636
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps: 2.06e+07
VF_0_ExplainedVarNew: 0.955
VF_0_ExplainedVarOld: 0.951
VF_0_Loss : 0.000117


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0041   0.0026   0.0100   9.0125   2.2441   3.0464
ADVA:  (22246,) (35013,) 0.6353640076542998
ADV1:  0.001658268090725952 0.0007017507527145746 0.009588297470661608 0.07696192977556038 -0.06697555356934032
ADVB:

***** Episode 56263, Mean R = -10.3  Std R = 5.5  Min R = -26.1
PolicyLoss: 2.13
Policy_Entropy: 0.215
Policy_KL: 0.007
Policy_SD: 0.536
Steps: 1.18e+04
TotalSteps: 2.07e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000101


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0029   9.0125   2.2441   3.0464
ADVA:  (20804,) (35318,) 0.5890480774675803
ADV1:  0.0 -0.0005260950273623913 0.007870613044695171 0.06066390796946025 -0.1023137912024028
ADVB:  (17779,) (35318,) 0.5033977008890651
ADV2:  0.006200005708932301 0.3098855747003303 0.488372763081823 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0301   0.6158   2.2274 101.1911  36.8110  32.5770
***** Episode 56294, Mean R = -9.5  Std R = 3.8  Min R = -20.7
PolicyLoss: 1.91
Policy_Entropy: 0.215
Policy_KL: 0.00735
Policy_SD: 0.53
Steps: 1.18e+04
TotalSteps: 2.07e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.000178


ValFun  Gradients: u/sd/Max/C Max

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7511   0.2843   1.4527 101.1911  36.8110  32.5770
***** Episode 56511, Mean R = -9.4  Std R = 4.2  Min R = -23.6
PolicyLoss: 2.07
Policy_Entropy: 0.215
Policy_KL: 0.00556
Policy_SD: 0.543
Steps: 1.15e+04
TotalSteps: 2.07e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 6.6e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0015   0.0052   9.0125   2.2441   3.0464
ADVA:  (17843,) (34633,) 0.5152022637368984
ADV1:  0.001041931431067608 0.0006111534104151247 0.006656001167654019 0.06044845043404956 -0.10086811956333613
ADVB:  (23197,) (34633,) 0.6697947044726128
ADV2:  0.31979649623192963 0.4720270615070038 0.5521519782363425 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8691   0.3349   1.4570 101.1911  36.8110  32.5770
***** Episode 56542, Mean R = -9.1  Std R = 5.5  Min R = -25.2
PolicyLoss: 2.18
Policy_Entropy: 0.215
Policy_KL: 0.00846
Policy_SD: 0.537
Steps: 1.16e+04
TotalSteps:

w        |    0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.06    0.04 |    0.69    1.87 |   -1.45   -3.11 |    1.47    3.14
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.01 |    0.03    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.25 |    0.00 |    1.57
seeker_angles |    0.00   -0.00 |    0.07    0.08 |   -0.98   -0.98 |    0.97    0.94
cs_angles |  0.0000 -0.0008 |  0.0730  0.0782 | -0.9779 -0.9830 |  0.9691  0.9429
optical_flow |  0.0000  0.0000 |  0.0195  0.0197 | -1.0025 -0.9084 |  0.9727  0.9468
v_err    | -0.0107 |  0.0593 | -0.4526 |  0.1023
landing_rewards |    9.55 |    2.08 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.03
tracking_rewards |  -15.28 |    4.41 |  -34.94 |   -8.42
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2699   0.5145   2.3664 101.1911  36.8110  32.5770
Update Cnt = 1840    ET =    813.3   Stats:  Mean, Std, Min, Max
r_f      |    4.55   -5.04   -8.73 |  190.51  168.84  195.04 | -383.98 -703.15 -395.11 |  385.99  376.55  394.12
v_f      |   -0.00    0.01    0.00 |    0.05    0.05    0.05 |   -0.36   -0.09   -0.09 |    0.09    0.50    0.27
r_i      |   14.60  -28.78  -44.36 |  680.30  636.24  774.22 |-1366.80-1246.67-1311.04 | 1290.06 1307.06 1337.99
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.09    0.09
norm_rf  |    1.41 |   22.05 |    0.02 |  389.01
norm_vf  |    0.08 |    0.04 |    0.04 |    0.67
gs_f     |    1.34 |    2.11 |    0.01 |   23.65
thrust   |   -0.00    0.01    0.00 |    0.67    0.68    0.68 |   -3.27   -3.46   -3.46 |    3.46    3.32    3.44
norm_thrust |    0.91 |    0.74 |    0.00 |    3.46
fuel     |    1.51 |    0.20 |    1.11 |    2.52
rewards  |   -9.81 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3092   0.1209   0.5384 101.1911  36.8110  32.5770
***** Episode 57348, Mean R = -11.2  Std R = 5.6  Min R = -26.5
PolicyLoss: 1.88
Policy_Entropy: 0.215
Policy_KL: 0.00682
Policy_SD: 0.545
Steps: 1.18e+04
TotalSteps: 2.11e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.000683


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0013   9.0125   2.2441   3.0464
ADVA:  (20108,) (35251,) 0.5704235340841395
ADV1:  0.0005794657462566168 -2.221154976937103e-05 0.007474057148476198 0.05641677602482309 -0.067563524544239
ADVB:  (21135,) (35251,) 0.599557459362855
ADV2:  0.17229602464089938 0.3562055816167642 0.46452146883815143 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4915   0.2000   0.8517 101.1911  36.8110  32.5770
***** Episode 57379, Mean R = -9.0  Std R = 3.4  Min R = -20.0
PolicyLoss: 1.83
Policy_Entropy: 0.216
Policy_KL: 0.0058
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps

ADVA:  (18796,) (34926,) 0.5381664089789842
ADV1:  0.00011721105990031289 -0.0002871624401288958 0.006813874015417157 0.06607032178518896 -0.06268282118104956
ADVB:  (19642,) (34926,) 0.562389051136689
ADV2:  0.10633668991629816 0.33899249077310845 0.4804471431649937 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3787   0.1410   0.6590 101.1911  36.8110  32.5770
***** Episode 57596, Mean R = -10.8  Std R = 4.6  Min R = -23.6
PolicyLoss: 1.86
Policy_Entropy: 0.216
Policy_KL: 0.00635
Policy_SD: 0.542
Steps: 1.16e+04
TotalSteps: 2.12e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.00226


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0004   9.0125   2.2441   3.0464
ADVA:  (21030,) (35001,) 0.6008399760006857
ADV1:  0.00034355748608396255 -0.00032596249693981266 0.007558468292052754 0.06607032178518896 -0.05491051189099283
ADVB:  (19417,) (35001,) 0.5547555784120454
ADV2:  0.08728388748604404 0.3053275940087677 0.444330930784

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0025   9.0125   2.2441   3.0464
ADVA:  (18810,) (35222,) 0.5340412242348532
ADV1:  0.0003808576330219318 0.0002434864175940324 0.006823000827912167 0.07508491092534114 -0.0557515805027371
ADVB:  (20457,) (35222,) 0.5808017716200102
ADV2:  0.16203536962991583 0.41876874862896657 0.5551712564704738 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3997   0.1470   0.7352 101.1911  36.8110  32.5770
***** Episode 57844, Mean R = -9.7  Std R = 4.1  Min R = -19.7
PolicyLoss: 2.22
Policy_Entropy: 0.217
Policy_KL: 0.00689
Policy_SD: 0.544
Steps: 1.18e+04
TotalSteps: 2.12e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.000642


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0011   0.0040   9.0125   2.2441   3.0464
ADVA:  (17850,) (35334,) 0.5051791475632535
ADV1:  0.0013655412948797106 0.0013375675013598371 0.006055822993424983 0.07508491092534114 -0.0557515805027371
ADVB:  

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0012   9.0125   2.2441   3.0464
ADVA:  (20798,) (35094,) 0.5926369179916795
ADV1:  0.0004875550662354292 0.00015120763201140414 0.00678428537779573 0.07981615148868526 -0.08329060169021835
ADVB:  (19453,) (35094,) 0.5543112782811876
ADV2:  0.08957689126596931 0.3161167558156365 0.4608304433234485 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6847   0.2286   1.0940 101.1911  36.8110  32.5770
***** Episode 58092, Mean R = -10.7  Std R = 5.3  Min R = -28.8
PolicyLoss: 1.75
Policy_Entropy: 0.217
Policy_KL: 0.00896
Policy_SD: 0.541
Steps: 1.18e+04
TotalSteps: 2.13e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.000331


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0006   0.0027   9.0125   2.2441   3.0464
ADVA:  (19676,) (35471,) 0.5547066617800457
ADV1:  0.0 -0.0002809050647250508 0.006612058760695199 0.07981615148868526 -0.08329060169021835
ADVB:  (19563,) (35471,

theta_cv |    0.22 |    0.27 |    0.00 |    1.51
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.96   -0.99 |    0.96    0.98
cs_angles |  0.0007  0.0017 |  0.0760  0.0814 | -0.9567 -0.9863 |  0.9599  0.9835
optical_flow |  0.0000  0.0001 |  0.0177  0.0223 | -1.2503 -1.2877 |  1.0384  1.3044
v_err    | -0.0112 |  0.0594 | -0.4537 |  0.1010
landing_rewards |    9.84 |    1.26 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -15.89 |    4.55 |  -33.44 |   -8.60
steps    |     379 |      20 |     336 |     420
***** Episode 58340, Mean R = -10.3  Std R = 4.5  Min R = -23.4
PolicyLoss: 2.06
Policy_Entropy: 0.218
Policy_KL: 0.00573
Policy_SD: 0.548
Steps: 1.18e+04
TotalSteps: 2.14e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.000321


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0011   0.0043   9.0125   2.2441   3.0464
ADVA:  (19547,) (35136,) 0.5563239981785064
ADV1:  1.4677597192798084e-05 

seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.98   -0.96 |    0.99    0.99
cs_angles |  0.0030  0.0013 |  0.0748  0.0774 | -0.9827 -0.9599 |  0.9913  0.9874
optical_flow | -0.0001 -0.0000 |  0.0207  0.0213 | -1.2465 -1.0614 |  1.1200  1.3928
v_err    | -0.0113 |  0.0597 | -0.4536 |  0.0935
landing_rewards |    9.87 |    1.13 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.07 |    0.01
tracking_rewards |  -15.61 |    4.52 |  -39.41 |   -8.74
steps    |     380 |      20 |     340 |     418
***** Episode 58650, Mean R = -9.8  Std R = 5.2  Min R = -26.0
PolicyLoss: 2.22
Policy_Entropy: 0.217
Policy_KL: 0.00762
Policy_SD: 0.545
Steps: 1.16e+04
TotalSteps: 2.16e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.000238


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0009   0.0031   9.0125   2.2441   3.0464
ADVA:  (19413,) (35206,) 0.5514116911890019
ADV1:  0.00026721745816719183 0.00019746455607575422 0.0058042419212364655 0.052

seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.98   -0.97 |    0.86    0.96
cs_angles |  0.0006  0.0021 |  0.0736  0.0787 | -0.9788 -0.9681 |  0.8639  0.9643
optical_flow |  0.0000 -0.0000 |  0.0179  0.0216 | -0.9646 -0.9685 |  0.8735  1.0487
v_err    | -0.0113 |  0.0596 | -0.4528 |  0.0993
landing_rewards |    9.84 |    1.26 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.07 |    0.01
tracking_rewards |  -15.33 |    4.73 |  -42.67 |   -8.33
steps    |     378 |      21 |     331 |     422
***** Episode 58960, Mean R = -10.7  Std R = 5.2  Min R = -23.3
PolicyLoss: 2.02
Policy_Entropy: 0.218
Policy_KL: 0.00867
Policy_SD: 0.553
Steps: 1.18e+04
TotalSteps: 2.17e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.00125


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0016   0.0053   9.0125   2.2441   3.0464
ADVA:  (21724,) (35453,) 0.6127549149578315
ADV1:  0.000850328220705558 0.0007442480524615529 0.0061155574568056625 0.048594

seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.96   -0.99 |    0.97    0.99
cs_angles |  0.0008  0.0005 |  0.0724  0.0778 | -0.9629 -0.9859 |  0.9659  0.9922
optical_flow |  0.0002 -0.0001 |  0.0191  0.0229 | -0.9289 -1.1022 |  0.8871  0.9865
v_err    | -0.0115 |  0.0599 | -0.4516 |  0.1076
landing_rewards |    9.58 |    2.00 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.07 |    0.02
tracking_rewards |  -15.36 |    4.22 |  -41.26 |   -7.58
steps    |     378 |      21 |     332 |     419
***** Episode 59270, Mean R = -10.3  Std R = 4.4  Min R = -21.1
PolicyLoss: 1.73
Policy_Entropy: 0.217
Policy_KL: 0.00731
Policy_SD: 0.553
Steps: 1.19e+04
TotalSteps: 2.18e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.000376


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0012   0.0042   9.0125   2.2441   3.0464
ADVA:  (19924,) (35111,) 0.5674574919540885
ADV1:  0.0005311250323009296 0.00037540737490560745 0.006848861080908464 0.0837

seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -0.99 |    1.00    1.00
cs_angles |  0.0016  0.0004 |  0.0784  0.0799 | -0.9812 -0.9899 |  0.9977  0.9991
optical_flow |  0.0000 -0.0000 |  0.0196  0.0205 | -1.1701 -0.9339 |  1.0245  1.0550
v_err    | -0.0114 |  0.0605 | -0.4594 |  0.1017
landing_rewards |    9.65 |    1.85 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -15.91 |    4.51 |  -36.79 |   -8.82
steps    |     377 |      21 |     334 |     420
***** Episode 59580, Mean R = -11.2  Std R = 4.7  Min R = -22.8
PolicyLoss: 2.16
Policy_Entropy: 0.219
Policy_KL: 0.0075
Policy_SD: 0.542
Steps: 1.16e+04
TotalSteps: 2.19e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.000433


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0019   9.0125   2.2441   3.0464
ADVA:  (20021,) (35173,) 0.5692150228868734
ADV1:  0.0007325445268644218 0.00019088679033379475 0.008236725039862457 0.052475

attitude |    0.06   -0.03    0.01 |    1.16    0.69    1.77 |   -3.14   -1.55   -3.14 |    3.14    1.55    3.14
w        |   -0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.03   -0.02 |    0.69    1.78 |   -1.46   -3.13 |    1.42    3.13
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.03 |    0.03    0.03    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.56
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.99   -1.00 |    0.99    0.99
cs_angles |  0.0002  0.0005 |  0.0714  0.0757 | -0.9860 -0.9956 |  0.9888  0.9911
optical_flow |  0.0001  0.0000 |  0.0190  0.0233 | -1.1178 -1.1335 |  0.9562  1.2765
v_err    | -0.0114 |  0.0602 | -0.4569 |  0.1004
landing_rewards |    9.74 |    1.59 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0370   0.4408   1.8259 101.1911  36.8110  32.5770
Update Cnt = 1940    ET =    796.2   Stats:  Mean, Std, Min, Max
r_f      |   -6.38   14.56   10.93 |  191.57  179.19  198.23 | -393.59 -379.08 -384.63 |  395.57  361.74  394.67
v_f      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.08   -0.07   -0.09 |    0.09    0.09    0.09
r_i      |  -37.39   32.46   52.59 |  682.14  671.51  761.03 |-1305.54-1370.06-1355.34 | 1345.92 1295.64 1343.16
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.15 |    0.07 |    0.01 |    0.71
norm_vf  |    0.07 |    0.01 |    0.04 |    0.12
gs_f     |    1.25 |    2.03 |    0.01 |   25.26
thrust   |    0.00    0.00   -0.00 |    0.67    0.68    0.68 |   -3.35   -3.40   -3.42 |    3.32    3.43    3.34
norm_thrust |    0.92 |    0.73 |    0.00 |    3.46
fuel     |    1.51 |    0.18 |    1.14 |    2.26
rewards  |   -9.63 

ADVA:  (19585,) (35357,) 0.5539214299855757
ADV1:  0.0 -0.0003168655777770114 0.005697492661493492 0.046759902039033796 -0.057641480020697966
ADVB:  (17732,) (35357,) 0.5015131374268179
ADV2:  0.002784891955409756 0.32528052209986874 0.5296618110800625 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5852   0.2160   1.2305 101.1911  36.8110  32.5770
***** Episode 60448, Mean R = -10.6  Std R = 4.6  Min R = -20.2
PolicyLoss: 1.98
Policy_Entropy: 0.218
Policy_KL: 0.00562
Policy_SD: 0.551
Steps: 1.17e+04
TotalSteps: 2.22e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.000215


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0008   9.0125   2.2441   3.0464
ADVA:  (17718,) (35283,) 0.502168182977638
ADV1:  0.0 -0.00011153342769796677 0.005014109273058831 0.046759902039033796 -0.053997024950224404
ADVB:  (18502,) (35283,) 0.5243885157157838
ADV2:  0.0545125319979101 0.40647230590295613 0.6143246263821776 3.0 0.0
Policy  Gradients: 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0021   9.0125   2.2441   3.0464
ADVA:  (19296,) (35367,) 0.5455933497328017
ADV1:  0.0 -0.0005070716908921219 0.007196081635465014 0.05227589841223168 -0.10103709245477954
ADVB:  (19619,) (35367,) 0.5547261571521475
ADV2:  0.09096224020781554 0.31433953920823204 0.4554830188514477 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.4990   1.1755   4.4060 101.1911  36.8110  32.5770
***** Episode 60696, Mean R = -10.4  Std R = 6.7  Min R = -39.5
PolicyLoss: 1.73
Policy_Entropy: 0.218
Policy_KL: 0.00742
Policy_SD: 0.549
Steps: 1.18e+04
TotalSteps: 2.23e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.000233


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0016   0.0052   9.0125   2.2441   3.0464
ADVA:  (21436,) (35393,) 0.6056564857457689
ADV1:  0.0011098951596023587 0.00033839264858105667 0.007662239079069651 0.05227589841223168 -0.10103709245477954
ADVB:  (21662,) (353

***** Episode 60913, Mean R = -10.9  Std R = 6.4  Min R = -27.2
PolicyLoss: 1.75
Policy_Entropy: 0.218
Policy_KL: 0.00577
Policy_SD: 0.542
Steps: 1.18e+04
TotalSteps: 2.24e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.000388


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0038   0.0018   0.0069   9.0125   2.2441   3.0464
ADVA:  (19785,) (35293,) 0.5605927521038166
ADV1:  0.0005667315391476605 -0.00048710151854351757 0.008288859547423399 0.04543804437787324 -0.0830151344779475
ADVB:  (22252,) (35293,) 0.6304932989544669
ADV2:  0.20997526610073325 0.34713419402813145 0.4218026923745017 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8214   0.3889   1.6769 101.1911  36.8110  32.5770
***** Episode 60944, Mean R = -9.9  Std R = 6.3  Min R = -29.0
PolicyLoss: 1.69
Policy_Entropy: 0.219
Policy_KL: 0.00504
Policy_SD: 0.54
Steps: 1.18e+04
TotalSteps: 2.24e+07
VF_0_ExplainedVarNew: 0.96
VF_0_ExplainedVarOld: 0.955
VF_0_Loss : 0.000623


ValFun  Gra

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4289   0.8146   2.6796 101.1911  36.8110  32.5770
***** Episode 61161, Mean R = -11.5  Std R = 5.7  Min R = -31.1
PolicyLoss: 2.59
Policy_Entropy: 0.219
Policy_KL: 0.00594
Policy_SD: 0.55
Steps: 1.16e+04
TotalSteps: 2.25e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.000175


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0022   9.0125   2.2441   3.0464
ADVA:  (19228,) (35063,) 0.5483843367652511
ADV1:  0.0 -0.0007983101015109587 0.0070342345982576816 0.118351101174691 -0.07268674611056081
ADVB:  (18323,) (35063,) 0.5225736531386361
ADV2:  0.03650118380345574 0.3005948531376449 0.45005431103366295 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0070   0.4504   1.8442 101.1911  36.8110  32.5770
***** Episode 61192, Mean R = -10.1  Std R = 5.5  Min R = -28.5
PolicyLoss: 1.76
Policy_Entropy: 0.219
Policy_KL: 0.00777
Policy_SD: 0.555
Steps: 1.17e+04
TotalSteps: 2.25e+07
VF_0

seeker_angles |    0.00    0.00 |    0.07    0.08 |   -1.00   -0.99 |    0.96    0.98
cs_angles |  0.0020  0.0024 |  0.0738  0.0780 | -0.9963 -0.9866 |  0.9643  0.9810
optical_flow | -0.0000 -0.0000 |  0.0181  0.0202 | -0.8877 -1.0209 |  1.0329  0.8983
v_err    | -0.0112 |  0.0594 | -0.4531 |  0.1104
landing_rewards |    9.39 |    2.40 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -15.13 |    4.57 |  -52.28 |   -7.46
steps    |     378 |      21 |     336 |     422
***** Episode 61440, Mean R = -9.4  Std R = 4.9  Min R = -22.7
PolicyLoss: 1.83
Policy_Entropy: 0.218
Policy_KL: 0.0204
Policy_SD: 0.546
Steps: 1.18e+04
TotalSteps: 2.26e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.000207


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0019   9.0125   2.2441   3.0464
ADVA:  (19910,) (35078,) 0.5675922230457837
ADV1:  0.0 -0.0006828267987493284 0.007375706613371048 0.05251617781095602 -0.0823

seeker_angles |    0.00    0.00 |    0.07    0.08 |   -1.00   -0.99 |    0.99    0.98
cs_angles |  0.0005  0.0030 |  0.0742  0.0795 | -0.9969 -0.9872 |  0.9945  0.9826
optical_flow |  0.0001 -0.0000 |  0.0200  0.0200 | -1.0773 -1.2277 |  1.2127  1.0112
v_err    | -0.0114 |  0.0602 | -0.4528 |  0.1169
landing_rewards |    9.55 |    2.08 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -15.43 |    4.29 |  -29.72 |   -8.12
steps    |     377 |      21 |     336 |     418
***** Episode 61750, Mean R = -11.0  Std R = 5.1  Min R = -23.1
PolicyLoss: 1.84
Policy_Entropy: 0.219
Policy_KL: 0.00447
Policy_SD: 0.549
Steps: 1.16e+04
TotalSteps: 2.27e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.00014


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0008   9.0125   2.2441   3.0464
ADVA:  (21433,) (34789,) 0.6160855442812383
ADV1:  0.00041036423333243137 -0.0005883460996465004 0.007715500396420843 0.0539

w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.03    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.44
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.97   -0.98 |    0.97    0.99
cs_angles |  0.0012  0.0022 |  0.0737  0.0777 | -0.9731 -0.9780 |  0.9691  0.9921
optical_flow | -0.0000 -0.0000 |  0.0206  0.0194 | -0.9654 -1.1168 |  1.1050  0.9874
v_err    | -0.0113 |  0.0601 | -0.4594 |  0.1115
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -15.18 |    4.05 |  -29.77 |   -8.57
steps    |     378 |      21 |     335 |     424
***** Episode 62060, Mean R = -11.0  Std R = 5.1  Min R = -21.6
PolicyLoss: 1.8
Policy_Entropy: 0.22
Policy_KL: 0.00691
Policy_SD: 0.544
Steps: 1.16e+04
TotalSteps: 2.

Update Cnt = 2010    ET =    794.7   Stats:  Mean, Std, Min, Max
r_f      |    7.18   -2.10   -9.75 |  176.96  177.69  209.35 | -385.02 -382.72 -396.56 |  396.18  367.68  383.47
v_f      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.11   -0.09   -0.09 |    0.10    0.10    0.11
r_i      |   30.83    6.35  -23.46 |  651.88  647.57  793.48 |-1279.13-1260.92-1304.28 | 1319.97 1330.40 1240.91
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.08   -0.10   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.39
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.45 |    2.42 |    0.01 |   25.23
thrust   |   -0.01   -0.00    0.00 |    0.67    0.67    0.68 |   -3.29   -3.42   -3.36 |    3.32    3.46    3.40
norm_thrust |    0.91 |    0.73 |    0.00 |    3.46
fuel     |    1.51 |    0.18 |    1.05 |    2.23
rewards  |  -10.04 |    5.36 |  -32.95 |   -1.63
fuel_rewards |   -4.32 |    0.53 |   -6.39 |   -3.04
glideslope_rewards |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   8.0514   4.0767  16.9936 101.1911  36.8110  32.5770
***** Episode 62618, Mean R = -10.8  Std R = 4.8  Min R = -25.1
PolicyLoss: 2.6
Policy_Entropy: 0.22
Policy_KL: 0.00594
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 2.31e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.000112


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0003   0.0014   9.0125   2.2441   3.0464
ADVA:  (17857,) (35051,) 0.5094576474280335
ADV1:  0.0003876762300120606 -0.00021465623129247882 0.008170324240444539 0.04370863494535582 -0.07245272068287462
ADVB:  (20887,) (35051,) 0.5959031126073436
ADV2:  0.23679433039275527 0.47317178762566575 0.5798565673291189 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.1481   1.2990   6.0548 101.1911  36.8110  32.5770
***** Episode 62649, Mean R = -10.1  Std R = 5.8  Min R = -25.6
PolicyLoss: 2.43
Policy_Entropy: 0.22
Policy_KL: 0.00572
Policy_SD: 0.54
Steps: 1.17e+04
TotalSte

ADVA:  (21587,) (34851,) 0.6194083383547101
ADV1:  0.0010749348353854757 -1.8128240267286237e-05 0.007931412593260872 0.049673261287661086 -0.056539908125933125
ADVB:  (21780,) (34851,) 0.624946199535164
ADV2:  0.18432272586872067 0.325685625903719 0.4027361895110143 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4717   0.1751   0.8090 101.1911  36.8110  32.5770
***** Episode 62866, Mean R = -10.1  Std R = 5.4  Min R = -22.5
PolicyLoss: 1.6
Policy_Entropy: 0.22
Policy_KL: 0.00613
Policy_SD: 0.544
Steps: 1.18e+04
TotalSteps: 2.31e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.00022


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0012   0.0043   9.0125   2.2441   3.0464
ADVA:  (21515,) (35094,) 0.6130677608708042
ADV1:  0.0010294629553986023 -0.00020402526438269733 0.008278784463518082 0.05925274319218021 -0.06783673248667649
ADVB:  (22099,) (35094,) 0.6297087821279991
ADV2:  0.18539752983876462 0.3223375265562978 0.391806520483787

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0011   0.0040   9.0125   2.2441   3.0464
ADVA:  (19151,) (34863,) 0.5493216303817801
ADV1:  0.0014440230916081369 0.0005512534443656981 0.008011404876747726 0.04491471938564556 -0.0538130469341969
ADVB:  (23851,) (34863,) 0.6841350428821387
ADV2:  0.30419952159529945 0.42792534796048637 0.483697509226595 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6796   0.2220   1.0931 101.1911  36.8110  32.5770
***** Episode 63114, Mean R = -9.7  Std R = 5.2  Min R = -24.0
PolicyLoss: 1.91
Policy_Entropy: 0.221
Policy_KL: 0.00465
Policy_SD: 0.536
Steps: 1.14e+04
TotalSteps: 2.32e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.000237


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0010   0.0036   9.0125   2.2441   3.0464
ADVA:  (18510,) (34901,) 0.53035729635254
ADV1:  0.0015485936388477672 0.0010551466421747896 0.006975725498240364 0.0387751070748219 -0.054498018821080525
ADVB:  (24

***** Episode 63331, Mean R = -11.0  Std R = 5.0  Min R = -22.8
PolicyLoss: 1.8
Policy_Entropy: 0.221
Policy_KL: 0.00738
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 2.33e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.000126


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0023   9.0125   2.2441   3.0464
ADVA:  (19851,) (35136,) 0.5649760928961749
ADV1:  0.0011637586688541587 0.0005272788786274281 0.0073831797537931955 0.06664947238632712 -0.06623391845638865
ADVB:  (22415,) (35136,) 0.6379496812386156
ADV2:  0.2540014737035883 0.41558864063606543 0.48616256770920463 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5381   0.2245   0.9541 101.1911  36.8110  32.5770
***** Episode 63362, Mean R = -10.5  Std R = 4.5  Min R = -22.8
PolicyLoss: 1.99
Policy_Entropy: 0.221
Policy_KL: 0.00648
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 2.33e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.000128


ValFun  G

theta_cv |    0.22 |    0.25 |    0.00 |    1.43
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.97   -0.97 |    1.00    0.98
cs_angles |  0.0016  0.0002 |  0.0747  0.0745 | -0.9729 -0.9718 |  0.9954  0.9845
optical_flow | -0.0001  0.0000 |  0.0195  0.0189 | -1.1211 -0.9738 |  1.0440  0.9740
v_err    | -0.0112 |  0.0601 | -0.4562 |  0.1091
landing_rewards |    9.16 |    2.77 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.03
tracking_rewards |  -15.20 |    3.97 |  -30.16 |   -7.81
steps    |     377 |      21 |     331 |     417
***** Episode 63610, Mean R = -8.4  Std R = 3.5  Min R = -17.4
PolicyLoss: 2.81
Policy_Entropy: 0.221
Policy_KL: 0.00457
Policy_SD: 0.542
Steps: 1.17e+04
TotalSteps: 2.34e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.000479


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0024   9.0125   2.2441   3.0464
ADVA:  (20431,) (34945,) 0.5846616111031621
ADV1:  0.0 -0.001079430253559458

theta_cv |    0.22 |    0.26 |    0.00 |    1.45
seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -0.96   -0.93 |    0.99    0.99
cs_angles |  0.0007 -0.0019 |  0.0762  0.0757 | -0.9580 -0.9320 |  0.9866  0.9897
optical_flow |  0.0001  0.0001 |  0.0187  0.0194 | -0.8506 -1.1532 |  1.1563  1.0665
v_err    | -0.0107 |  0.0587 | -0.4532 |  0.1048
landing_rewards |    9.19 |    2.72 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.02
tracking_rewards |  -15.28 |    4.28 |  -34.75 |   -7.82
steps    |     379 |      20 |     332 |     418
***** Episode 63920, Mean R = -11.1  Std R = 7.1  Min R = -31.4
PolicyLoss: 2.45
Policy_Entropy: 0.22
Policy_KL: 0.00482
Policy_SD: 0.54
Steps: 1.18e+04
TotalSteps: 2.35e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.000353


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0011   0.0042   9.0125   2.2441   3.0464
ADVA:  (19045,) (35319,) 0.5392281774682183
ADV1:  0.0 -0.000907315635640778

attitude |   -0.03    0.03    0.05 |    1.10    0.67    1.80 |   -3.14   -1.57   -3.14 |    3.14    1.55    3.14
w        |   -0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.03    0.09 |    0.68    1.79 |   -1.45   -3.09 |    1.49    3.12
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.02   -0.04 |    0.02    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.23 |    0.27 |    0.00 |    1.53
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.96   -0.99 |    0.93    0.94
cs_angles |  0.0017  0.0031 |  0.0779  0.0817 | -0.9593 -0.9877 |  0.9322  0.9413
optical_flow |  0.0000 -0.0002 |  0.0179  0.0179 | -1.0050 -1.0890 |  1.0154  0.7762
v_err    | -0.0110 |  0.0595 | -0.4549 |  0.1093
landing_rewards |    8.84 |    3.20 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (21236,) (35347,) 0.6007864882451127
ADV1:  0.00273651763988725 0.0018108179727059088 0.007803871236379138 0.05849881329557588 -0.06146548323206308
ADVB:  (25776,) (35347,) 0.729227374317481
ADV2:  0.3940523440111304 0.48784105791837823 0.4933756400293725 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7805   0.3354   1.5022 101.1911  36.8110  32.5770
Update Cnt = 2080    ET =    841.0   Stats:  Mean, Std, Min, Max
r_f      |  -20.15    3.93   -5.37 |  183.94  172.43  197.65 | -394.15 -380.72 -391.42 |  390.45  384.75  362.11
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.09    0.10    0.11
r_i      |  -30.99   23.24  -59.79 |  683.82  646.82  770.79 |-1295.79-1284.94-1294.33 | 1346.85 1294.43 1331.46
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.01 |    0.34
norm_vf  |    0.08 |    0.01 |    0.02 |    0.13
gs_f     | 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0040   9.0125   2.2441   3.0464
ADVA:  (22106,) (35154,) 0.6288331342094783
ADV1:  0.0 -0.0009922734333673532 0.008402704118526961 0.05305917556043821 -0.04892213746945799
ADVB:  (16837,) (35154,) 0.47894976389600047
ADV2:  0.0 0.2630445910051768 0.4240061330722983 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6348   0.2324   1.1399 101.1911  36.8110  32.5770
***** Episode 64788, Mean R = -10.5  Std R = 4.5  Min R = -20.4
PolicyLoss: 1.67
Policy_Entropy: 0.222
Policy_KL: 0.00801
Policy_SD: 0.546
Steps: 1.19e+04
TotalSteps: 2.39e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000205


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0016   0.0063   9.0125   2.2441   3.0464
ADVA:  (22218,) (35247,) 0.6303515192782364
ADV1:  0.0014094851347230906 0.00021439398108573317 0.00933116080538793 0.05305917556043821 -0.062228057520680634
ADVB:  (21684,) (35247,) 0.615201293

***** Episode 65005, Mean R = -10.7  Std R = 5.2  Min R = -26.2
PolicyLoss: 1.97
Policy_Entropy: 0.221
Policy_KL: 0.00713
Policy_SD: 0.542
Steps: 1.19e+04
TotalSteps: 2.4e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.000117


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0005   9.0125   2.2441   3.0464
ADVA:  (18067,) (35503,) 0.5088865729656649
ADV1:  1.988537102055009e-05 8.084574156248808e-05 0.0055217286252090885 0.07912630304872426 -0.08185221396500508
ADVB:  (19874,) (35503,) 0.559783680252373
ADV2:  0.12403661377563882 0.40962240000427763 0.5693916927459827 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3790   0.1404   0.6411 101.1911  36.8110  32.5770
***** Episode 65036, Mean R = -10.0  Std R = 5.1  Min R = -25.9
PolicyLoss: 2.22
Policy_Entropy: 0.221
Policy_KL: 0.00803
Policy_SD: 0.546
Steps: 1.18e+04
TotalSteps: 2.4e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.000275


ValFun  Grad

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9534   0.3934   1.6193 101.1911  36.8110  32.5770
***** Episode 65253, Mean R = -11.3  Std R = 6.8  Min R = -28.8
PolicyLoss: 2.08
Policy_Entropy: 0.222
Policy_KL: 0.00419
Policy_SD: 0.545
Steps: 1.17e+04
TotalSteps: 2.41e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 8.9e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   9.0125   2.2441   3.0464
ADVA:  (18947,) (35275,) 0.5371226080793763
ADV1:  0.0002972037526022594 -6.674644785844473e-05 0.006349982431694628 0.050745843543664904 -0.0639138550095309
ADVB:  (21055,) (35275,) 0.5968816442239546
ADV2:  0.1593139975980516 0.34988701401616257 0.4641968850005469 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0920   0.4983   1.9004 101.1911  36.8110  32.5770
***** Episode 65284, Mean R = -9.7  Std R = 3.7  Min R = -17.5
PolicyLoss: 1.77
Policy_Entropy: 0.223
Policy_KL: 0.00771
Policy_SD: 0.546
Steps: 1.18e+04
TotalSte

ADVA:  (20606,) (35153,) 0.5861804113446932
ADV1:  0.0 -0.0003123245991292387 0.00669701413289852 0.041055163089373625 -0.06419499946979468
ADVB:  (17988,) (35153,) 0.5117059710408784
ADV2:  0.02339225869075543 0.32207295290073834 0.47795673506841074 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3136   0.1030   0.5326 101.1911  36.8110  32.5770
***** Episode 65501, Mean R = -9.6  Std R = 5.8  Min R = -28.9
PolicyLoss: 1.9
Policy_Entropy: 0.223
Policy_KL: 0.00674
Policy_SD: 0.54
Steps: 1.18e+04
TotalSteps: 2.41e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000106


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0018   0.0065   9.0125   2.2441   3.0464
ADVA:  (22221,) (35403,) 0.6276586729938141
ADV1:  6.374857254248106e-05 -3.758009124998246e-05 0.007765219951015226 0.041055163089373625 -0.06419499946979468
ADVB:  (18170,) (35403,) 0.5132333418071915
ADV2:  0.02763753361152498 0.3475183273036699 0.5070902864515487 3.0 0.0
Policy 

theta_cv |    0.22 |    0.26 |    0.00 |    1.35
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -0.99 |    0.99    1.00
cs_angles |  0.0025  0.0020 |  0.0761  0.0766 | -0.9752 -0.9902 |  0.9892  0.9983
optical_flow |  0.0001  0.0000 |  0.0191  0.0208 | -1.0104 -1.0407 |  0.9663  1.1241
v_err    | -0.0116 |  0.0592 | -0.4533 |  0.1179
landing_rewards |    9.39 |    2.40 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -15.40 |    4.31 |  -33.11 |   -7.34
steps    |     380 |      20 |     336 |     416
***** Episode 65780, Mean R = -11.7  Std R = 5.8  Min R = -25.9
PolicyLoss: 1.4
Policy_Entropy: 0.223
Policy_KL: 0.00646
Policy_SD: 0.544
Steps: 1.18e+04
TotalSteps: 2.43e+07
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.961
VF_0_Loss : 0.00024


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0012   0.0043   9.0125   2.2441   3.0464
ADVA:  (21040,) (35309,) 0.595882069727265
ADV1:  0.0012931179343305416 -6.8

w        |    0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.02    0.04 |    0.65    1.81 |   -1.42   -3.14 |    1.41    3.10
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.02 |    0.03    0.04    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.57
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.95   -0.99 |    1.00    0.99
cs_angles |  0.0015  0.0024 |  0.0736  0.0767 | -0.9467 -0.9872 |  0.9992  0.9939
optical_flow |  0.0001 -0.0000 |  0.0183  0.0192 | -0.9842 -0.9356 |  0.8765  1.1726
v_err    | -0.0114 |  0.0594 | -0.4525 |  0.1001
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.02
tracking_rewards |  -15.05 |    4.43 |  -33.86 |   -8.40
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5068   0.2189   0.9886 101.1911  36.8110  32.5770
Update Cnt = 2140    ET =    881.6   Stats:  Mean, Std, Min, Max
r_f      |   -3.86   -0.17   22.14 |  180.31  159.76  198.69 | -371.93 -361.76 -377.06 |  375.75  369.84  387.51
v_f      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.10    0.09
r_i      |  -27.99   -0.25   74.74 |  698.64  623.67  770.07 |-1324.84-1232.30-1309.49 | 1330.86 1219.80 1320.83
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.02 |    0.33
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.32 |    1.69 |    0.01 |   12.89
thrust   |    0.00    0.00    0.01 |    0.66    0.66    0.66 |   -3.30   -3.00   -3.45 |    3.46    3.41    3.45
norm_thrust |    0.88 |    0.73 |    0.00 |    3.46
fuel     |    1.48 |    0.20 |    1.09 |    2.47
rewards  |   -9.33 

ADVA:  (21124,) (34972,) 0.6040260780052613
ADV1:  0.0003947783927284584 -0.00015344330774689867 0.007362686246931219 0.05895376071040331 -0.07843183903161915
ADVB:  (19423,) (34972,) 0.5553871668763583
ADV2:  0.08971135814947442 0.3165958450598682 0.4596470496618551 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6352   0.2297   1.1252 101.1911  36.8110  32.5770
***** Episode 66648, Mean R = -9.4  Std R = 5.4  Min R = -28.8
PolicyLoss: 1.72
Policy_Entropy: 0.225
Policy_KL: 0.00721
Policy_SD: 0.535
Steps: 1.17e+04
TotalSteps: 2.46e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000345


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0008   9.0125   2.2441   3.0464
ADVA:  (21173,) (34800,) 0.6084195402298851
ADV1:  0.0003401121369078436 -0.0002872383711761121 0.007453335754381383 0.037788817382125583 -0.07843183903161915
ADVB:  (19034,) (34800,) 0.5469540229885057
ADV2:  0.06753141733271988 0.2899092499618382 0.4164735588444

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0057   9.0125   2.2441   3.0464
ADVA:  (18405,) (34811,) 0.5287121886759932
ADV1:  0.001087069906072033 0.0010711593410890514 0.0066625854526017264 0.08009002520990613 -0.13261860402363285
ADVB:  (22257,) (34811,) 0.6393668667949786
ADV2:  0.28771172512173665 0.5010124369671772 0.6066697169843853 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9583   0.4330   2.0270 101.1911  36.8110  32.5770
***** Episode 66896, Mean R = -9.6  Std R = 4.4  Min R = -18.6
PolicyLoss: 2.35
Policy_Entropy: 0.226
Policy_KL: 0.00764
Policy_SD: 0.54
Steps: 1.16e+04
TotalSteps: 2.47e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.000183


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0018   0.0065   9.0125   2.2441   3.0464
ADVA:  (19068,) (34826,) 0.5475219663469821
ADV1:  -0.0001003561756895378 -0.0006571673121082027 0.006579000947547078 0.08009002520990613 -0.13261860402363285
ADVB:

***** Episode 67113, Mean R = -10.0  Std R = 5.8  Min R = -24.8
PolicyLoss: 1.69
Policy_Entropy: 0.224
Policy_KL: 0.0062
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 2.48e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 8.64e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0028   9.0125   2.2441   3.0464
ADVA:  (20278,) (35241,) 0.5754093243665049
ADV1:  0.0018331568046896542 0.0009637179406940535 0.007545468369815355 0.04792055842621945 -0.07119721998427114
ADVB:  (24582,) (35241,) 0.6975397973950795
ADV2:  0.3490922329030787 0.4633416374777569 0.4995154699106485 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2917   0.1143   0.5433 101.1911  36.8110  32.5770
***** Episode 67144, Mean R = -8.2  Std R = 3.3  Min R = -17.4
PolicyLoss: 2.01
Policy_Entropy: 0.225
Policy_KL: 0.00503
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 2.48e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 7.6e-05


ValFun  Gradie

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3336   0.1132   0.5973 101.1911  36.8110  32.5770
***** Episode 67361, Mean R = -9.4  Std R = 5.3  Min R = -23.5
PolicyLoss: 1.47
Policy_Entropy: 0.225
Policy_KL: 0.00592
Policy_SD: 0.528
Steps: 1.18e+04
TotalSteps: 2.48e+07
VF_0_ExplainedVarNew: 0.949
VF_0_ExplainedVarOld: 0.946
VF_0_Loss : 0.000676


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0053   0.0033   0.0125   9.0125   2.2441   3.0464
ADVA:  (20602,) (34962,) 0.5892683484926492
ADV1:  0.0014111267806975062 0.0007864162521570778 0.008790870090076492 0.06157623203657464 -0.059897961729535876
ADVB:  (22502,) (34962,) 0.6436130656140953
ADV2:  0.26400341901407554 0.456302717042678 0.5307326858354876 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5281   0.2391   1.1447 101.1911  36.8110  32.5770
***** Episode 67392, Mean R = -10.5  Std R = 4.9  Min R = -23.8
PolicyLoss: 2.15
Policy_Entropy: 0.225
Policy_KL: 0.00777
Policy_SD: 0.533
Steps: 1.16e+04
TotalSte

attitude |    0.04   -0.01   -0.05 |    1.12    0.69    1.82 |   -3.14   -1.53   -3.14 |    3.14    1.54    3.14
w        |    0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.01    0.00 |    0.68    1.82 |   -1.37   -3.13 |    1.51    3.14
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.02    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.26 |    0.00 |    1.65
seeker_angles |    0.00    0.00 |    0.08    0.07 |   -0.99   -0.99 |    0.88    0.99
cs_angles |  0.0031  0.0023 |  0.0760  0.0731 | -0.9918 -0.9856 |  0.8847  0.9938
optical_flow | -0.0001  0.0000 |  0.0184  0.0192 | -1.2112 -1.0569 |  0.9979  1.1613
v_err    | -0.0113 |  0.0596 | -0.4557 |  0.1139
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0539   0.3977   1.6697 101.1911  36.8110  32.5770
Update Cnt = 2190    ET =   1379.6   Stats:  Mean, Std, Min, Max
r_f      |    1.92   -2.66   -3.52 |  192.50  153.98  208.45 | -376.15 -344.86 -395.51 |  398.81  333.80  393.71
v_f      |   -0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.10   -0.11   -0.10 |    0.10    0.10    0.12
r_i      |    5.53   36.30   14.43 |  691.91  644.22  769.50 |-1371.69-1265.50-1300.71 | 1321.78 1292.64 1309.19
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.02 |    0.31
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.31 |    2.56 |    0.01 |   38.49
thrust   |   -0.00   -0.00    0.01 |    0.66    0.68    0.67 |   -3.41   -3.41   -3.40 |    3.43    3.35    3.41
norm_thrust |    0.90 |    0.73 |    0.00 |    3.46
fuel     |    1.48 |    0.21 |    1.08 |    2.31
rewards  |  -10.32 

ADVA:  (20494,) (35229,) 0.5817366374293905
ADV1:  0.00046388826214884167 -0.0004747485536903537 0.008353328127399343 0.07354012291975442 -0.08550034800437395
ADVB:  (21148,) (35229,) 0.6003008884725652
ADV2:  0.1606995863222566 0.3403610830487738 0.450397397950505 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4737   0.1760   0.8696 101.1911  36.8110  32.5770
***** Episode 68198, Mean R = -9.6  Std R = 5.6  Min R = -29.1
PolicyLoss: 1.71
Policy_Entropy: 0.225
Policy_KL: 0.00486
Policy_SD: 0.528
Steps: 1.19e+04
TotalSteps: 2.52e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.00142


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   9.0125   2.2441   3.0464
ADVA:  (19073,) (35402,) 0.5387548726060675
ADV1:  0.0004749920408813231 9.834816604858551e-05 0.00743279354358155 0.046534670429603575 -0.08550034800437395
ADVB:  (21421,) (35402,) 0.6050788091068301
ADV2:  0.19385170542648197 0.4039546386243615 0.523565356566624 3.

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0041   0.0026   0.0090   9.0125   2.2441   3.0464
ADVA:  (22103,) (35231,) 0.627373619823451
ADV1:  0.00023603558582845074 -0.000832728607289257 0.008373459157821873 0.056521870555171994 -0.07041704505177972
ADVB:  (18499,) (35231,) 0.5250773466549346
ADV2:  0.03575868371362787 0.2421970418814788 0.3683251763098916 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3565   0.1534   0.7737 101.1911  36.8110  32.5770
***** Episode 68446, Mean R = -12.4  Std R = 6.6  Min R = -28.8
PolicyLoss: 1.38
Policy_Entropy: 0.225
Policy_KL: 0.0064
Policy_SD: 0.542
Steps: 1.16e+04
TotalSteps: 2.53e+07
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.959
VF_0_Loss : 0.000341


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0016   0.0063   9.0125   2.2441   3.0464
ADVA:  (21807,) (35157,) 0.6202747674716272
ADV1:  0.001164126748646686 -1.8210666095064797e-05 0.008804603926442897 0.04321305283090987 -0.07041704505177972
ADVB

***** Episode 68663, Mean R = -11.5  Std R = 6.5  Min R = -26.7
PolicyLoss: 1.38
Policy_Entropy: 0.226
Policy_KL: 0.00648
Policy_SD: 0.542
Steps: 1.15e+04
TotalSteps: 2.53e+07
VF_0_ExplainedVarNew: 0.962
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.0001


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0012   0.0046   9.0125   2.2441   3.0464
ADVA:  (21861,) (34974,) 0.6250643335048893
ADV1:  0.000559255766382126 -0.0005571504050967958 0.008415886011541782 0.034795877525902935 -0.0633675691648056
ADVB:  (20149,) (34974,) 0.5761136844513067
ADV2:  0.12553253760943578 0.3076911087530391 0.4012689673248587 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6252   0.2018   1.1250 101.1911  36.8110  32.5770
***** Episode 68694, Mean R = -9.7  Std R = 5.8  Min R = -25.4
PolicyLoss: 1.6
Policy_Entropy: 0.226
Policy_KL: 0.00734
Policy_SD: 0.535
Steps: 1.17e+04
TotalSteps: 2.54e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 5.47e-05


ValFun  Gradien

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4463   0.1667   0.7375 101.1911  36.8110  32.5770
***** Episode 68911, Mean R = -9.2  Std R = 5.2  Min R = -23.8
PolicyLoss: 1.87
Policy_Entropy: 0.225
Policy_KL: 0.00841
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 2.54e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000257


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0015   9.0125   2.2441   3.0464
ADVA:  (21317,) (34948,) 0.6099633741558887
ADV1:  0.00016855882237600244 -0.0005937864215342254 0.007249166319307523 0.04237068799980276 -0.0667378481836815
ADVB:  (19275,) (34948,) 0.5515337072221587
ADV2:  0.08542527308134752 0.29398759598120616 0.4132115919081931 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5802   0.1736   1.0467 101.1911  36.8110  32.5770
***** Episode 68942, Mean R = -9.6  Std R = 4.5  Min R = -22.1
PolicyLoss: 1.6
Policy_Entropy: 0.225
Policy_KL: 0.00778
Policy_SD: 0.541
Steps: 1.15e+04
TotalSte

attitude |    0.01    0.10    0.14 |    1.22    0.67    1.83 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.10    0.17 |    0.68    1.84 |   -1.53   -3.11 |    1.47    3.14
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.02 |    0.03    0.03    0.04
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.26 |    0.00 |    1.92
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.98   -0.99 |    0.99    0.98
cs_angles |  0.0032  0.0033 |  0.0741  0.0773 | -0.9833 -0.9901 |  0.9916  0.9783
optical_flow |  0.0000  0.0000 |  0.0199  0.0198 | -1.1628 -1.1524 |  1.2059  0.9515
v_err    | -0.0115 |  0.0596 | -0.4520 |  0.0999
landing_rewards |    9.35 |    2.46 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  20.1300  12.6166  49.2565 101.1911  36.8110  32.5770
Update Cnt = 2240    ET =   1727.3   Stats:  Mean, Std, Min, Max
r_f      |   21.69    5.02  -15.82 |  181.70  166.78  199.15 | -395.83 -356.04 -397.89 |  377.67  385.40  385.21
v_f      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.11   -0.11 |    0.10    0.11    0.10
r_i      |   71.12   33.87  -50.47 |  689.22  662.05  744.17 |-1362.32-1334.69-1292.24 | 1327.91 1230.64 1327.59
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.02 |    0.43
norm_vf  |    0.08 |    0.01 |    0.03 |    0.12
gs_f     |    1.28 |    2.19 |    0.00 |   24.24
thrust   |    0.00    0.00    0.00 |    0.67    0.67    0.66 |   -3.42   -3.40   -3.43 |    3.42    3.33    3.35
norm_thrust |    0.89 |    0.73 |    0.00 |    3.46
fuel     |    1.50 |    0.20 |    1.07 |    2.13
rewards  |   -9.98 

ADVA:  (20094,) (35451,) 0.5668105272065668
ADV1:  0.0 -0.0003598279609605761 0.006544189357436426 0.04084647285073595 -0.1298865486030824
ADVB:  (18979,) (35451,) 0.5353586640715353
ADV2:  0.055684791321630484 0.28865864036831235 0.42941854221781584 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4232   0.1561   0.7926 101.1911  36.8110  32.5770
***** Episode 69748, Mean R = -10.5  Std R = 5.6  Min R = -29.4
PolicyLoss: 1.62
Policy_Entropy: 0.225
Policy_KL: 0.00718
Policy_SD: 0.534
Steps: 1.16e+04
TotalSteps: 2.58e+07
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.000603


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0016   9.0125   2.2441   3.0464
ADVA:  (19742,) (35011,) 0.5638799234526292
ADV1:  0.0 -0.0006051994210994505 0.006541168275282149 0.04084647285073595 -0.08024846280728476
ADVB:  (19134,) (35011,) 0.5465139527577048
ADV2:  0.07185879384396998 0.29150256675740444 0.421726405932113 3.0 0.0
Policy  Gradients: u/sd/

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0030   9.0125   2.2441   3.0464
ADVA:  (20680,) (35204,) 0.5874332462220202
ADV1:  0.0009336549567699531 -5.076156035830243e-05 0.008130035941934111 0.0335320244574121 -0.05987110538732239
ADVB:  (22033,) (35204,) 0.6258663788205886
ADV2:  0.1903194898787669 0.34401435729175167 0.43800074193228944 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2254   0.0705   0.3702 101.1911  36.8110  32.5770
***** Episode 69996, Mean R = -10.2  Std R = 6.4  Min R = -30.2
PolicyLoss: 1.65
Policy_Entropy: 0.226
Policy_KL: 0.00716
Policy_SD: 0.534
Steps: 1.17e+04
TotalSteps: 2.58e+07
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.00017


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0010   9.0125   2.2441   3.0464
ADVA:  (22817,) (35151,) 0.649113823219823
ADV1:  0.00046880660402865565 -0.0009465024916492349 0.00925888145343712 0.04042969416818182 -0.05987110538732239
ADVB:

***** Episode 70213, Mean R = -10.7  Std R = 6.5  Min R = -38.6
PolicyLoss: 1.82
Policy_Entropy: 0.226
Policy_KL: 0.00638
Policy_SD: 0.53
Steps: 1.18e+04
TotalSteps: 2.59e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.000176


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0037   9.0125   2.2441   3.0464
ADVA:  (21199,) (35136,) 0.6033413023679417
ADV1:  0.0013636963224215159 0.0004906281378190239 0.00827682294818434 0.08967712491438451 -0.1114708173832325
ADVB:  (22117,) (35136,) 0.6294683515482696
ADV2:  0.2429753280010993 0.40366414843842374 0.4747281627332426 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4591   0.2132   0.9149 101.1911  36.8110  32.5770
***** Episode 70244, Mean R = -10.2  Std R = 4.9  Min R = -22.6
PolicyLoss: 1.93
Policy_Entropy: 0.226
Policy_KL: 0.0063
Policy_SD: 0.534
Steps: 1.16e+04
TotalSteps: 2.59e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.000164


ValFun  Gradie

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.7318   1.4822   5.2896 101.1911  36.8110  32.5770
***** Episode 70461, Mean R = -10.8  Std R = 6.3  Min R = -29.4
PolicyLoss: 1.21
Policy_Entropy: 0.226
Policy_KL: 0.00757
Policy_SD: 0.536
Steps: 1.16e+04
TotalSteps: 2.6e+07
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.000115


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0014   0.0053   9.0125   2.2441   3.0464
ADVA:  (20216,) (35105,) 0.5758723828514457
ADV1:  0.001885979235153561 0.0005928122928831887 0.00859527841619649 0.05616813178902952 -0.07504770683200657
ADVB:  (25590,) (35105,) 0.7289559891753311
ADV2:  0.35923452954938684 0.44120023366171546 0.44915403990281394 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.0847   0.9844   3.6435 101.1911  36.8110  32.5770
***** Episode 70492, Mean R = -10.2  Std R = 5.6  Min R = -27.2
PolicyLoss: 1.83
Policy_Entropy: 0.226
Policy_KL: 0.00569
Policy_SD: 0.533
Steps: 1.18e+04
TotalSte

attitude |    0.03    0.00    0.17 |    1.24    0.68    1.90 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.01    0.11 |    0.68    1.91 |   -1.47   -3.13 |    1.55    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.42
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.92   -0.99 |    0.99    0.93
cs_angles |  0.0009  0.0038 |  0.0733  0.0788 | -0.9155 -0.9879 |  0.9903  0.9282
optical_flow |  0.0001 -0.0001 |  0.0196  0.0192 | -1.0704 -1.0544 |  1.0496  1.0052
v_err    | -0.0116 |  0.0600 | -0.4559 |  0.1054
landing_rewards |    9.13 |    2.82 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (20298,) (35289,) 0.5751934030434414
ADV1:  0.0012173998582889478 0.00012715463779708417 0.008200808969479428 0.042640557323469275 -0.05853249728507531
ADVB:  (23505,) (35289,) 0.6660715803791549
ADV2:  0.2663736947429444 0.39579620027916934 0.4578699654395492 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5233   0.2176   0.9989 101.1911  36.8110  32.5770
Update Cnt = 2290    ET =   1481.0   Stats:  Mean, Std, Min, Max
r_f      |  -12.86  -12.34    0.25 |  173.24  177.77  208.00 | -399.10 -374.54 -389.41 |  389.11  380.40  381.61
v_f      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.12   -0.10   -0.09 |    0.09    0.09    0.10
r_i      |    6.33  -36.85   22.60 |  649.92  680.53  777.07 |-1307.43-1358.54-1256.69 | 1287.85 1366.40 1354.53
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.10    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.02 |    0.36
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f  

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0030   9.0125   2.2441   3.0464
ADVA:  (20913,) (35226,) 0.5936808039516266
ADV1:  0.00043853962140116744 6.81076765541097e-05 0.006625445958018785 0.06259181653034962 -0.07022716480676033
ADVB:  (19628,) (35226,) 0.5572020666553115
ADV2:  0.09717890797397133 0.3367164569093264 0.47925463387444334 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2853   0.5956   2.9934 101.1911  36.8110  32.5770
***** Episode 71298, Mean R = -9.0  Std R = 4.7  Min R = -21.0
PolicyLoss: 1.8
Policy_Entropy: 0.227
Policy_KL: 0.00685
Policy_SD: 0.526
Steps: 1.17e+04
TotalSteps: 2.63e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.000131


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0013   9.0125   2.2441   3.0464
ADVA:  (21475,) (35384,) 0.6069127289170246
ADV1:  0.0 -0.0004943345742735206 0.006587151701681051 0.06259181653034962 -0.07022716480676033
ADVB:  (17313,) (35384,

***** Episode 71515, Mean R = -9.0  Std R = 4.7  Min R = -19.6
PolicyLoss: 1.71
Policy_Entropy: 0.228
Policy_KL: 0.0048
Policy_SD: 0.525
Steps: 1.19e+04
TotalSteps: 2.64e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.000356


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0015   9.0125   2.2441   3.0464
ADVA:  (18483,) (35693,) 0.5178326282464348
ADV1:  0.0013760376182929196 0.0009808876558474345 0.006636158149199702 0.04332973276840124 -0.0802657960368906
ADVB:  (25517,) (35693,) 0.7149020816406578
ADV2:  0.36694212280844773 0.5021566132222453 0.5500929038098087 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4131   0.1555   0.7329 101.1911  36.8110  32.5770
***** Episode 71546, Mean R = -9.2  Std R = 3.7  Min R = -18.7
PolicyLoss: 2.1
Policy_Entropy: 0.228
Policy_KL: 0.00463
Policy_SD: 0.522
Steps: 1.19e+04
TotalSteps: 2.64e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.000139


ValFun  Gradient

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4249   0.1774   0.9192 101.1911  36.8110  32.5770
***** Episode 71763, Mean R = -9.0  Std R = 4.8  Min R = -18.7
PolicyLoss: 1.7
Policy_Entropy: 0.228
Policy_KL: 0.00707
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 2.65e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000261


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0024   9.0125   2.2441   3.0464
ADVA:  (22500,) (35191,) 0.6393680202324458
ADV1:  0.0015193267026443677 0.00041416106573432897 0.007962987706852076 0.04921457643562638 -0.09582070569487144
ADVB:  (23183,) (35191,) 0.6587763916910574
ADV2:  0.23861014487328902 0.3605425518747835 0.4217293638866611 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2207   0.0848   0.4154 101.1911  36.8110  32.5770
***** Episode 71794, Mean R = -9.0  Std R = 4.1  Min R = -18.8
PolicyLoss: 1.64
Policy_Entropy: 0.228
Policy_KL: 0.00568
Policy_SD: 0.522
Steps: 1.17e+04
TotalStep

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4022   0.1280   0.6524 101.1911  36.8110  32.5770
***** Episode 72011, Mean R = -10.8  Std R = 5.4  Min R = -23.7
PolicyLoss: 1.49
Policy_Entropy: 0.227
Policy_KL: 0.00623
Policy_SD: 0.525
Steps: 1.18e+04
TotalSteps: 2.66e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.000108


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0002   0.0009   9.0125   2.2441   3.0464
ADVA:  (23182,) (35229,) 0.6580374123591359
ADV1:  0.0007897860010308878 2.2915001771340344e-05 0.00717566475243128 0.03654213932971451 -0.05748066290692208
ADVB:  (20391,) (35229,) 0.5788129098186153
ADV2:  0.11077540692106887 0.2843762848051583 0.39674758546723277 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4296   0.1719   0.7572 101.1911  36.8110  32.5770
***** Episode 72042, Mean R = -8.7  Std R = 4.8  Min R = -20.0
Policy

attitude |   -0.13    0.01    0.00 |    1.19    0.68    1.85 |   -3.14   -1.56   -3.14 |    3.14    1.55    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.00    0.04 |    0.69    1.86 |   -1.40   -3.12 |    1.42    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.03    0.02    0.04
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.51
seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -0.93   -1.00 |    1.00    0.96
cs_angles |  0.0019 -0.0003 |  0.0771  0.0756 | -0.9261 -0.9973 |  0.9958  0.9589
optical_flow |  0.0000  0.0001 |  0.0204  0.0203 | -0.9933 -1.0292 |  0.9756  1.0858
v_err    | -0.0114 |  0.0597 | -0.4564 |  0.1027
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4110   0.5747   2.5865 101.1911  36.8110  32.5770
Update Cnt = 2340    ET =   1401.5   Stats:  Mean, Std, Min, Max
r_f      |   -0.85   13.24    0.09 |  179.48  166.31  207.05 | -390.69 -387.82 -388.31 |  389.16  370.56  389.80
v_f      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.12   -0.10 |    0.10    0.10    0.10
r_i      |   27.10   77.82   -4.50 |  640.16  657.37  800.03 |-1251.72-1279.70-1304.17 | 1363.14 1288.52 1291.51
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.08    0.09    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.38
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.31 |    1.49 |    0.01 |   10.82
thrust   |   -0.00    0.00    0.00 |    0.66    0.67    0.66 |   -3.38   -3.38   -3.43 |    3.43    3.39    3.41
norm_thrust |    0.88 |    0.73 |    0.00 |    3.46
fuel     |    1.47 |    0.18 |    1.07 |    2.01
rewards  |   -9.87 

ADVA:  (23217,) (35472,) 0.6545162381596752
ADV1:  0.0018750276853392225 0.00019333424885253854 0.009178044515773611 0.04679423069677535 -0.06721942127984631
ADVB:  (23146,) (35472,) 0.6525146594497068
ADV2:  0.2290402747288563 0.34444312264702237 0.40631796582750773 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6656   0.7314   3.4640 101.1911  36.8110  32.5770
***** Episode 72848, Mean R = -10.1  Std R = 5.1  Min R = -23.4
PolicyLoss: 1.59
Policy_Entropy: 0.226
Policy_KL: 0.0306
Policy_SD: 0.519
Steps: 1.17e+04
TotalSteps: 2.69e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.00115


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0017   0.0065   9.0125   2.2441   3.0464
ADVA:  (22786,) (35182,) 0.6476607356034336
ADV1:  0.0016274064297732756 0.00014865686548752797 0.00916125109694279 0.06033436841739559 -0.05507805857638586
ADVB:  (22992,) (35182,) 0.6535160025012791
ADV2:  0.2234420610313967 0.35175985732846005 0.4207775689994574

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0001   0.0005   9.0125   2.2441   3.0464
ADVA:  (20460,) (34962,) 0.5852067959498884
ADV1:  0.0006284068865014952 -0.00041783924513805583 0.007845863176567767 0.061706036830632416 -0.0761702788234731
ADVB:  (21736,) (34962,) 0.6217035638693439
ADV2:  0.17765591639582584 0.3190210885144753 0.41314419916382367 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4679   0.2068   0.7706 101.1911  36.8110  32.5770
***** Episode 73096, Mean R = -10.3  Std R = 5.3  Min R = -25.5
PolicyLoss: 1.53
Policy_Entropy: 0.228
Policy_KL: 0.00523
Policy_SD: 0.523
Steps: 1.17e+04
TotalSteps: 2.7e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.000155


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0034   9.0125   2.2441   3.0464
ADVA:  (20497,) (34944,) 0.5865670787545788
ADV1:  0.00090861954814

***** Episode 73313, Mean R = -8.2  Std R = 3.8  Min R = -16.0
PolicyLoss: 1.82
Policy_Entropy: 0.229
Policy_KL: 0.0063
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 2.71e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00014


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0034   0.0022   0.0083   9.0125   2.2441   3.0464
ADVA:  (19550,) (35287,) 0.5540283957264716
ADV1:  0.0005850711094422438 -0.00015631954789822533 0.007514258653171909 0.0354045929996728 -0.07969174530548451
ADVB:  (22075,) (35287,) 0.6255844928727293
ADV2:  0.20270283292438848 0.3703932161216732 0.4678752868965902 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4537   0.2041   0.8314 101.1911  36.8110  32.5770
***** Episode 73344, Mean R = -9.7  Std R = 5.0  Min R = -23.3
PolicyLoss: 1.76
Policy_Entropy: 0.228
Policy_KL: 0.00492
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 2.71e+07

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4253   0.1195   0.6691 101.1911  36.8110  32.5770
***** Episode 73561, Mean R = -10.2  Std R = 4.3  Min R = -20.4
PolicyLoss: 1.59
Policy_Entropy: 0.228
Policy_KL: 0.00648
Policy_SD: 0.534
Steps: 1.16e+04
TotalSteps: 2.72e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.000229


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0009   0.0036   9.0125   2.2441   3.0464
ADVA:  (22519,) (35115,) 0.641292894774313
ADV1:  0.0015119908323192642 -0.00012971327330783513 0.009102278854904824 0.041364941251248094 -0.09379957717363274
ADVB:  (23819,) (35115,) 0.6783141107788694
ADV2:  0.24839434328087298 0.34902596579235873 0.40241565365401494 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2546   0.0763   0.3975 101.1911  36.8110  32.5770
***** Episode 73592, Mean R = -9.0  Std R = 5.0  Min R = -18.2
PolicyLoss: 1.54
Policy_Entropy: 0.229
Policy_KL: 0.00464
Policy_SD: 0.526
Steps: 1.16e+04
Tota

theta_cv |    0.21 |    0.25 |    0.00 |    1.68
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.99   -0.97 |    0.96    0.98
cs_angles |  0.0013  0.0033 |  0.0712  0.0802 | -0.9913 -0.9655 |  0.9550  0.9755
optical_flow | -0.0001  0.0001 |  0.0201  0.0195 | -1.2112 -1.1277 |  1.2013  0.9790
v_err    | -0.0113 |  0.0602 | -0.4527 |  0.1087
landing_rewards |    8.81 |    3.24 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -15.12 |    4.08 |  -30.73 |   -7.95
steps    |     377 |      20 |     334 |     421
***** Episode 73840, Mean R = -9.9  Std R = 5.1  Min R = -24.8
PolicyLoss: 1.62
Policy_Entropy: 0.229
Policy_KL: 0.00393
Policy_SD: 0.527
Steps: 1.17e+04
TotalSteps: 2.73e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.00029


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0007   9.0125  

attitude |    0.02   -0.03    0.10 |    1.18    0.68    1.82 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.03    0.04 |    0.68    1.83 |   -1.47   -3.14 |    1.49    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.01 |    0.03    0.04    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.37
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -0.99 |    0.96    0.99
cs_angles |  0.0018  0.0023 |  0.0778  0.0759 | -0.9794 -0.9940 |  0.9637  0.9867
optical_flow |  0.0001 -0.0001 |  0.0184  0.0193 | -0.9395 -0.9641 |  0.9877  1.0430
v_err    | -0.0110 |  0.0594 | -0.4515 |  0.1049
landing_rewards |    9.16 |    2.77 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (21793,) (34755,) 0.6270464681340814
ADV1:  0.0002764941845832579 -0.0010726750248860252 0.009378307478523475 0.039017110054000825 -0.12333274291386165
ADVB:  (19798,) (34755,) 0.5696446554452597
ADV2:  0.09727330250606742 0.28207281272195167 0.3927129665990179 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9477   0.3467   1.7586 101.1911  36.8110  32.5770
Update Cnt = 2400    ET =   1559.2   Stats:  Mean, Std, Min, Max
r_f      |    9.82    6.58    9.10 |  194.34  168.71  202.97 | -384.30 -380.83 -374.83 |  388.34  382.41  388.92
v_f      |   -0.00   -0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.11   -0.11 |    0.10    0.11    0.11
r_i      |   28.49   31.25  -13.36 |  707.12  642.92  750.37 |-1323.37-1234.84-1315.60 | 1352.53 1257.18 1284.99
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.10    0.09
norm_rf  |    0.14 |    0.06 |    0.03 |    0.35
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0014   0.0052   9.0125   2.2441   3.0464
ADVA:  (22471,) (35372,) 0.6352764898790003
ADV1:  0.002064838049802037 0.0008915004840893499 0.007988861434902957 0.06425326436567602 -0.056240173300996364
ADVB:  (25001,) (35372,) 0.7068019902747936
ADV2:  0.32032727499583336 0.4109781569978701 0.4402782384192353 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4962   0.1639   0.8233 101.1911  36.8110  32.5770
***** Episode 74708, Mean R = -9.2  Std R = 5.3  Min R = -25.2
PolicyLoss: 1.75
Policy_Entropy: 0.227
Policy_KL: 0.00535
Policy_SD: 0.529
Steps: 1.17e+04
TotalSteps: 2.76e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000151


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0032   0.0020   0.0073   9.0125   2.2441   3.0464
ADVA:  (21748,) (35038,) 0.6206975283977396
ADV1:  0.0011539277046084956 -0.00030803111328520474 0.008628152922507019 0.03849547863603481 -0.0643091242467751
ADVB:

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0018   9.0125   2.2441   3.0464
ADVA:  (21501,) (34887,) 0.6163040674176627
ADV1:  0.0001226194184838214 -0.0007096134011977979 0.008041729299950617 0.08467932781953574 -0.06846931132688161
ADVB:  (19302,) (34887,) 0.5532719924327113
ADV2:  0.07277269532088892 0.270698305479272 0.4069333078727288 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4912   0.1808   0.8272 101.1911  36.8110  32.5770
***** Episode 74956, Mean R = -10.8  Std R = 6.9  Min R = -30.7
PolicyLoss: 1.46
Policy_Entropy: 0.228
Policy_KL: 0.00859
Policy_SD: 0.535
Steps: 1.15e+04
TotalSteps: 2.77e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.000551


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0012   9.0125   2.2441   3.0464
ADVA:  (21448,) (35078,) 0.6114373681509778
ADV1:  0.0008657976452313

***** Episode 75173, Mean R = -8.7  Std R = 4.4  Min R = -17.8
PolicyLoss: 2.11
Policy_Entropy: 0.227
Policy_KL: 0.00565
Policy_SD: 0.533
Steps: 1.18e+04
TotalSteps: 2.78e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.000223


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0012   0.0040   9.0125   2.2441   3.0464
ADVA:  (18898,) (35450,) 0.5330888575458392
ADV1:  0.0004420029881153781 0.00013038041701509694 0.005985701672729681 0.03264478995886644 -0.06475414289293291
ADVB:  (21495,) (35450,) 0.6063469675599436
ADV2:  0.18378820086198472 0.38271550083153816 0.5002490760928755 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7216   0.2636   1.2523 101.1911  36.8110  32.5770
***** Episode 75204, Mean R = -9.1  Std R = 4.2  Min R = -21.8
PolicyLoss: 1.89
Policy_Entropy: 0.228
Policy_KL: 0.00665
Policy_SD: 0.527
Steps: 1.19e+04
TotalSteps: 2.78e

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6319   0.2579   1.2724 101.1911  36.8110  32.5770
***** Episode 75421, Mean R = -9.8  Std R = 4.9  Min R = -22.0
PolicyLoss: 1.84
Policy_Entropy: 0.228
Policy_KL: 0.0072
Policy_SD: 0.531
Steps: 1.17e+04
TotalSteps: 2.79e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.000824


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0012   0.0044   9.0125   2.2441   3.0464
ADVA:  (21273,) (35175,) 0.604776119402985
ADV1:  0.0010197133301223472 0.000247935838794318 0.008372829180045227 0.045155929409008644 -0.05848698277156418
ADVB:  (21169,) (35175,) 0.6018194740582801
ADV2:  0.16566000157335709 0.3569420241618275 0.45933001589365385 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2872   0.5319   2.6090 101.1911  36.8110  32.5770
***** Episode 75452, Mean R = -9.3  Std R = 5.0  Min R = -22.6
PolicyLoss: 1.77
Policy_Entropy: 0.228
Policy_KL: 0.00584
Policy_SD: 0.533
Steps: 1.19e+04
TotalSteps

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.02    0.03 |    0.61    1.90 |   -1.45   -3.14 |    1.37    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.04    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.40
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.99   -0.99 |    0.88    0.96
cs_angles |  0.0007  0.0038 |  0.0742  0.0747 | -0.9948 -0.9891 |  0.8763  0.9582
optical_flow | -0.0000 -0.0000 |  0.0202  0.0197 | -1.1326 -0.9609 |  1.0067  0.9843
v_err    | -0.0113 |  0.0599 | -0.4551 |  0.0980
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -14.89 |    3.88 |  -31.26 |   -8.27
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8029   0.3991   1.7075 101.1911  36.8110  32.5770
Update Cnt = 2450    ET =   1553.6   Stats:  Mean, Std, Min, Max
r_f      |   11.79   -2.57   -1.29 |  190.81  173.93  188.09 | -381.96 -392.33 -355.05 |  387.89  397.38  392.61
v_f      |   -0.00    0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.10    0.09    0.09
r_i      |   63.31  -30.69  -37.23 |  702.23  649.41  745.61 |-1331.97-1335.56-1275.40 | 1290.95 1323.54 1360.84
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.10    0.09
norm_rf  |    0.14 |    0.06 |    0.02 |    0.32
norm_vf  |    0.08 |    0.01 |    0.04 |    0.11
gs_f     |    1.72 |    9.16 |    0.01 |  159.26
thrust   |   -0.00   -0.00    0.01 |    0.67    0.67    0.67 |   -3.40   -3.42   -3.07 |    3.41    3.45    3.45
norm_thrust |    0.90 |    0.73 |    0.00 |    3.46
fuel     |    1.47 |    0.17 |    1.08 |    1.95
rewards  |  -10.28 

ADVA:  (23031,) (34909,) 0.6597439055830875
ADV1:  0.001307531977447902 -0.0001414025389641615 0.009169708727106285 0.04601516486316842 -0.10946606642066598
ADVB:  (21297,) (34909,) 0.6100719012289094
ADV2:  0.15940260831685052 0.31128929236995273 0.3967467116387438 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3132   0.1207   0.5078 101.1911  36.8110  32.5770
***** Episode 76258, Mean R = -9.1  Std R = 3.8  Min R = -18.5
PolicyLoss: 1.53
Policy_Entropy: 0.228
Policy_KL: 0.00436
Policy_SD: 0.53
Steps: 1.15e+04
TotalSteps: 2.82e+07
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 7.73e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0018   0.0069   9.0125   2.2441   3.0464
ADVA:  (20485,) (34925,) 0.5865425912670007
ADV1:  0.0016110912942109788 0.0008546944117077843 0.008072522700973332 0.04438316151345434 -0.10946606642066598
ADVB:  (23583,) (34925,) 0.6752469577666428
ADV2:  0.2865009469697902 0.43885136906673616 0.5103299775056214 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0020   9.0125   2.2441   3.0464
ADVA:  (19899,) (35136,) 0.5663422131147541
ADV1:  0.0008828893158110912 0.0002698133969319992 0.006602134365530963 0.041142383653530845 -0.05896616462872206
ADVB:  (22393,) (35136,) 0.6373235428051002
ADV2:  0.234410500235673 0.3890689531882883 0.4675211680969558 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5217   0.2719   1.1079 101.1911  36.8110  32.5770
***** Episode 76506, Mean R = -8.9  Std R = 4.6  Min R = -23.2
PolicyLoss: 1.83
Policy_Entropy: 0.228
Policy_KL: 0.00439
Policy_SD: 0.528
Steps: 1.18e+04
TotalSteps: 2.83e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 7.72e-05


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0017   9.0125   2.2441   3.0464
ADVA:  (20161,) (35223,) 0.5723816824234165
ADV1:  0.0003238413592352943

***** Episode 76723, Mean R = -10.3  Std R = 4.8  Min R = -20.7
PolicyLoss: 2.07
Policy_Entropy: 0.227
Policy_KL: 0.00739
Policy_SD: 0.535
Steps: 1.19e+04
TotalSteps: 2.84e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.00011


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0017   9.0125   2.2441   3.0464
ADVA:  (21099,) (35624,) 0.5922692566808893
ADV1:  0.00020462847102153264 -0.0004210717271921543 0.007321421672417749 0.03888978491451289 -0.0704248417050064
ADVB:  (20057,) (35624,) 0.563019312822816
ADV2:  0.09413627631423208 0.30106321418016824 0.43716214646230295 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6069   0.2354   1.1353 101.1911  36.8110  32.5770
***** Episode 76754, Mean R = -10.2  Std R = 5.0  Min R = -21.2
PolicyLoss: 1.59
Policy_Entropy: 0.227
Policy_KL: 0.00624
Policy_SD: 0.533
Steps: 1.21e+04
TotalSteps: 2.84e+07
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.000118


Dynamics: 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5789   0.2459   1.1217 101.1911  36.8110  32.5770
***** Episode 76971, Mean R = -11.8  Std R = 4.7  Min R = -21.3
PolicyLoss: 1.78
Policy_Entropy: 0.229
Policy_KL: 0.00535
Policy_SD: 0.542
Steps: 1.17e+04
TotalSteps: 2.85e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000309


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0009   9.0125   2.2441   3.0464
ADVA:  (22142,) (35564,) 0.622595883477674
ADV1:  0.0017575797307757112 0.0011398122088296437 0.008127366411355013 0.04203814676789158 -0.06299249262018813
ADVB:  (22731,) (35564,) 0.6391575750759194
ADV2:  0.25767758091170084 0.43030477858442784 0.5072479534832944 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4309   0.1762   0.8275 101.1911  36.8110  32.5770
***** Episode 77002, Mean R = -10.2  Std R = 4.7  Min R = -19.6
PolicyLoss: 2.02
Policy_Entropy: 0.228
Policy_KL: 0.00446
Policy_SD: 0.539
Steps: 1.19e+04
TotalSt

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.04    0.00 |    0.66    1.89 |   -1.37   -3.12 |    1.49    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.03    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.37
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -0.99 |    0.98    0.98
cs_angles |  0.0008  0.0008 |  0.0752  0.0757 | -0.9817 -0.9864 |  0.9774  0.9809
optical_flow | -0.0000  0.0002 |  0.0175  0.0205 | -1.2473 -1.0369 |  0.9340  1.1530
v_err    | -0.0114 |  0.0601 | -0.4547 |  0.1076
landing_rewards |    9.29 |    2.57 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -14.95 |    3.95 |  -27.19 |   -7.89
steps    |     380 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5773   0.1983   1.0499 101.1911  36.8110  32.5770
Update Cnt = 2500    ET =   1553.5   Stats:  Mean, Std, Min, Max
r_f      |  -10.03    3.69  -17.78 |  189.94  170.40  195.01 | -387.28 -359.58 -391.47 |  391.32  372.71  395.11
v_f      |    0.00   -0.01    0.00 |    0.05    0.04    0.05 |   -0.11   -0.09   -0.11 |    0.11    0.08    0.10
r_i      |  -50.88   56.51  -55.28 |  710.41  658.35  725.96 |-1326.66-1325.08-1247.13 | 1268.85 1329.20 1290.40
v_i      |    0.00   -0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.15 |    0.06 |    0.03 |    0.37
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.14 |    2.11 |    0.00 |   26.54
thrust   |    0.00   -0.00    0.01 |    0.67    0.67    0.66 |   -3.30   -3.30   -3.40 |    3.43    3.10    3.43
norm_thrust |    0.90 |    0.73 |    0.00 |    3.46
fuel     |    1.46 |    0.19 |    1.05 |    2.18
rewards  |  -10.07 

ADVA:  (20127,) (35349,) 0.5693796146991428
ADV1:  0.00014824234039978804 -0.00020085117361840317 0.0057144433169787415 0.04940595634694622 -0.09042783355878242
ADVB:  (19789,) (35349,) 0.5598178166284761
ADV2:  0.10733624489844497 0.3270481959280503 0.469655406527904 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7691   0.3625   1.2809 101.1911  36.8110  32.5770
***** Episode 77808, Mean R = -9.2  Std R = 4.3  Min R = -22.2
PolicyLoss: 1.74
Policy_Entropy: 0.228
Policy_KL: 0.0091
Policy_SD: 0.524
Steps: 1.17e+04
TotalSteps: 2.88e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 5.47e-05


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0006   0.0026   9.0125   2.2441   3.0464
ADVA:  (20685,) (35292,) 0.5861101666099966
ADV1:  0.0006063157809745364 0.00025035201426258587 0.005870968283332186 0.04940595634694622 -0.09042783355878242
ADVB:  (20658

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0033   0.0019   0.0068   9.0125   2.2441   3.0464
ADVA:  (21259,) (35047,) 0.6065854424059121
ADV1:  0.0001379410489341839 -0.0010466030040256298 0.008205835142919653 0.0377858687384664 -0.0669846746060531
ADVB:  (19018,) (35047,) 0.542642736896168
ADV2:  0.05481179351232145 0.23561043273969004 0.3461051017041116 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3345   0.1137   0.5547 101.1911  36.8110  32.5770
***** Episode 78056, Mean R = -11.1  Std R = 7.5  Min R = -31.0
PolicyLoss: 1.29
Policy_Entropy: 0.229
Policy_KL: 0.00489
Policy_SD: 0.531
Steps: 1.16e+04
TotalSteps: 2.89e+07
VF_0_ExplainedVarNew: 0.958
VF_0_ExplainedVarOld: 0.956
VF_0_Loss : 0.00012


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0048   0.0029   0.0103   9.0125   2.2441   3.0464
ADVA:  (21299,) (35175,) 0.6055152807391614
ADV1:  0.0015562711693572883 0.00043498450628982747 0.009049476373275168 0.045539563328374 -0.06433412971844027
ADVB:  (

***** Episode 78273, Mean R = -9.6  Std R = 4.7  Min R = -19.6
PolicyLoss: 2.33
Policy_Entropy: 0.229
Policy_KL: 0.00762
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 2.9e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.000193


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0016   9.0125   2.2441   3.0464
ADVA:  (19761,) (35044,) 0.5638911083209679
ADV1:  0.0 -0.0005346398838371553 0.006125160869319263 0.03988359340260536 -0.08059784145580368
ADVB:  (17276,) (35044,) 0.4929802533957311
ADV2:  0.0 0.2908576510486403 0.465593502141548 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6765   0.2572   1.1967 101.1911  36.8110  32.5770
***** Episode 78304, Mean R = -8.7  Std R = 4.0  Min R = -17.4
PolicyLoss: 1.75
Policy_Entropy: 0.228
Policy_KL: 0.00789
Policy_SD: 0.532
Steps: 1.17e+04
TotalSteps: 2.9e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.000308


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd : 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3372   0.1235   0.6147 101.1911  36.8110  32.5770
***** Episode 78521, Mean R = -9.4  Std R = 4.6  Min R = -20.8
PolicyLoss: 1.75
Policy_Entropy: 0.229
Policy_KL: 0.00735
Policy_SD: 0.533
Steps: 1.17e+04
TotalSteps: 2.91e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 5.83e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0010   9.0125   2.2441   3.0464
ADVA:  (20498,) (35151,) 0.583141304657051
ADV1:  0.00015261986665378747 -0.0006339596056045886 0.007482115678692834 0.051050705085616266 -0.05579051654904327
ADVB:  (20249,) (35151,) 0.576057580154192
ADV2:  0.10311538856653228 0.29042794899067126 0.41052383552404653 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3896   0.1775   0.7580 101.1911  36.8110  32.5770
***** Episode 78552, Mean R = -9.6  Std R = 5.5  Min R = -21.7
PolicyLoss: 1.49
Policy_Entropy: 0.229
Policy_KL: 0.00872
Policy_SD: 0.533
Steps: 1.16e+04
TotalS

attitude |    0.04    0.03   -0.03 |    1.23    0.71    1.88 |   -3.14   -1.56   -3.14 |    3.14    1.56    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.04   -0.01 |    0.70    1.89 |   -1.49   -3.13 |    1.38    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.02 |    0.03    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.47
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -0.97 |    0.99    0.99
cs_angles |  0.0027  0.0025 |  0.0765  0.0791 | -0.9917 -0.9741 |  0.9922  0.9902
optical_flow | -0.0000 -0.0000 |  0.0192  0.0211 | -0.8822 -1.0159 |  1.0750  1.1710
v_err    | -0.0113 |  0.0602 | -0.4518 |  0.1105
landing_rewards |    9.39 |    2.40 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (22293,) (35333,) 0.6309399145274955
ADV1:  0.00031714607562014635 -0.00041631320975348284 0.007536879067337674 0.052546520398428165 -0.08740607109743848
ADVB:  (19401,) (35333,) 0.549090085755526
ADV2:  0.06999707815035949 0.27642291302085925 0.4072390738159077 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4812   0.7056   3.1429 101.1911  36.8110  32.5770
Update Cnt = 2550    ET =   1386.4   Stats:  Mean, Std, Min, Max
r_f      |    5.32    7.04    3.90 |  183.93  165.73  205.10 | -389.28 -355.82 -378.33 |  393.65  377.90  393.69
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.10    0.10    0.11
r_i      |  -16.63   22.35  -12.40 |  664.01  653.91  787.45 |-1358.00-1269.02-1346.37 | 1321.30 1299.76 1328.61
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.10    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.01 |    0.33
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0014   9.0125   2.2441   3.0464
ADVA:  (22164,) (35077,) 0.6318670353793083
ADV1:  0.0004987577407093531 -0.0005734250416506572 0.007902789560188308 0.0432159300264805 -0.07537747125714056
ADVB:  (20430,) (35077,) 0.5824329332611112
ADV2:  0.11811544635356963 0.29059709628892416 0.3937402907789044 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6136   0.2498   1.1667 101.1911  36.8110  32.5770
***** Episode 79358, Mean R = -8.9  Std R = 4.5  Min R = -20.4
PolicyLoss: 1.48
Policy_Entropy: 0.229
Policy_KL: 0.00853
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 2.94e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.000133


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0033   0.0019   0.0068   9.0125   2.2441   3.0464
ADVA:  (18780,) (35001,) 0.5365560983971887
ADV1:  0.001823786612967609 0.0010658884904177573 0.007233325351404827 0.0432159300264805 -0.05660544031249343
ADVB:  

***** Episode 79575, Mean R = -8.7  Std R = 4.0  Min R = -16.4
PolicyLoss: 1.82
Policy_Entropy: 0.228
Policy_KL: 0.0107
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 2.95e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.000117


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0005   9.0125   2.2441   3.0464
ADVA:  (19286,) (35356,) 0.5454802579477317
ADV1:  0.00092956103982628 0.0004455852470007129 0.006559267626826166 0.04789481262734632 -0.06102414987138493
ADVB:  (22745,) (35356,) 0.6433137232718633
ADV2:  0.24799600439315772 0.409834950408306 0.5002971091838553 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.8099   1.0098   4.0486 101.1911  36.8110  32.5770
***** Episode 79606, Mean R = -9.2  Std R = 4.1  Min R = -16.9
PolicyLoss: 1.9
Policy_Entropy: 0.229
Policy_KL: 0.00809
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 2.95e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.000141


ValFun  Gradients

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0431   0.5157   2.1518 101.1911  36.8110  32.5770
***** Episode 79823, Mean R = -10.4  Std R = 5.8  Min R = -28.1
PolicyLoss: 1.73
Policy_Entropy: 0.229
Policy_KL: 0.00727
Policy_SD: 0.537
Steps: 1.18e+04
TotalSteps: 2.96e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.000319


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0012   9.0125   2.2441   3.0464
ADVA:  (20199,) (34879,) 0.5791163737492474
ADV1:  0.0017583700664623473 0.0009016353719508164 0.007471357009631458 0.03639858517546918 -0.060471122855415754
ADVB:  (24049,) (34879,) 0.6894979787264544
ADV2:  0.34882241441747885 0.463886101957904 0.5033633669901422 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.6090   3.0924  11.7696 101.1911  36.8110  32.5770
***** Episode 79854, Mean R = -9.3  Std R = 5.2  Min R = -26.1
PolicyLoss: 2.01
Policy_Entropy: 0.23
Policy_KL: 0.00615
Policy_SD: 0.534
Steps: 1.16e+04
TotalStep

ADVA:  (21320,) (34901,) 0.6108707486891493
ADV1:  0.001207735437108481 0.0005067508682362608 0.007408250336897859 0.07283213454224519 -0.07295492140939186
ADVB:  (22578,) (34901,) 0.6469155611587061
ADV2:  0.22819312058887645 0.3724397148460288 0.46696982152871214 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7524   0.3297   1.5322 101.1911  36.8110  32.5770
***** Episode 80071, Mean R = -8.5  Std R = 5.0  Min R = -25.8
PolicyLoss: 1.71
Policy_Entropy: 0.23
Policy_KL: 0.00799
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 2.97e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 6.79e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0005   0.0016   9.0125   2.2441   3.0464
ADVA:  (20808,) (34810,) 0.597759264579144
ADV1:  0.0010479741874219921 0.0005693269937639225 0.0069092640157080104 0.07283213454224519 -0.058873726694152105
ADVB:  (21809,) (34810,) 0.626515369146797
ADV2:  0.22117678167288451 0.38927700814901084 0.5034961870092928 3

attitude |    0.02    0.02   -0.01 |    1.28    0.70    1.87 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.02   -0.05 |    0.69    1.89 |   -1.40   -3.14 |    1.50    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.02 |    0.03    0.03    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.38
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.97   -0.99 |    1.00    0.99
cs_angles |  0.0008  0.0003 |  0.0752  0.0761 | -0.9715 -0.9900 |  0.9960  0.9948
optical_flow | -0.0001 -0.0000 |  0.0206  0.0211 | -1.0098 -1.1528 |  1.1054  1.1423
v_err    | -0.0108 |  0.0601 | -0.4533 |  0.0982
landing_rewards |    9.77 |    1.49 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (20287,) (34807,) 0.5828425316746632
ADV1:  0.0012056435266567375 0.00038361710833827357 0.007720847157451257 0.04287990840422323 -0.06768180285885067
ADVB:  (22272,) (34807,) 0.6398712902577068
ADV2:  0.2108775637503036 0.36534218338252034 0.4505851821496562 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.8280   2.0653   8.6819 130.3035  38.5834  37.2821
Update Cnt = 2600    ET =   1337.3   Stats:  Mean, Std, Min, Max
r_f      |    6.22  -15.41    7.38 |  188.95  182.70  199.29 | -392.06 -390.48 -382.72 |  379.74  372.97  381.23
v_f      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.11   -0.10   -0.11 |    0.10    0.10    0.11
r_i      |    1.53  -16.68    9.30 |  693.67  681.46  743.46 |-1290.65-1385.42-1265.67 | 1238.14 1267.18 1224.94
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.02 |    0.34
norm_vf  |    0.08 |    0.01 |    0.04 |    0.11
gs_f   

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0019   0.0064   9.0125   2.2441   3.0464
ADVA:  (22287,) (35311,) 0.6311630936535357
ADV1:  0.0014135850345462966 0.00023437598874452215 0.00849527168595282 0.04524220670810175 -0.06458792904425448
ADVB:  (21217,) (35311,) 0.6008609215258701
ADV2:  0.15088068467016827 0.31383639758198306 0.4096033545438425 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4604   0.2021   0.9572 130.3035  38.5834  37.2821
***** Episode 80908, Mean R = -8.9  Std R = 4.8  Min R = -23.8
PolicyLoss: 1.56
Policy_Entropy: 0.23
Policy_KL: 0.00627
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 3e+07
VF_0_ExplainedVarNew: 0.945
VF_0_ExplainedVarOld: 0.939
VF_0_Loss : 0.000509


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0024   0.0086   9.0125   2.2441   3.0464
ADVA:  (22465,) (35250,) 0.6373049645390071
ADV1:  0.002204680323302545 0.0010434175259050665 0.008917222487858414 0.04005913242620801 -0.06458792904425448
ADVB:  (23

***** Episode 81125, Mean R = -8.7  Std R = 4.1  Min R = -21.2
PolicyLoss: 1.54
Policy_Entropy: 0.229
Policy_KL: 0.00739
Policy_SD: 0.536
Steps: 1.16e+04
TotalSteps: 3.01e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00016


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0012   9.0125   2.2441   3.0464
ADVA:  (19336,) (35001,) 0.5524413588183195
ADV1:  0.00109489051772459 0.0007748178154103059 0.0062077510225119915 0.08559029489098469 -0.07740682172079294
ADVB:  (23124,) (35001,) 0.6606668380903403
ADV2:  0.2734196865745754 0.435150864196562 0.5235727079367746 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6545   0.2790   1.1614 130.3035  38.5834  37.2821
***** Episode 81156, Mean R = -9.6  Std R = 4.4  Min R = -18.9
PolicyLoss: 1.96
Policy_Entropy: 0.229
Policy_KL: 0.00575
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 3.01e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.000239


ValFun  Gradients

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7179   0.2140   1.1809 130.3035  38.5834  37.2821
***** Episode 81373, Mean R = -8.8  Std R = 3.8  Min R = -18.3
PolicyLoss: 1.6
Policy_Entropy: 0.23
Policy_KL: 0.00679
Policy_SD: 0.537
Steps: 1.18e+04
TotalSteps: 3.01e+07
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.000178


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0014   9.0125   2.2441   3.0464
ADVA:  (22456,) (35157,) 0.6387348181016583
ADV1:  0.0008821228965689033 -0.0003021151168881221 0.00819678860231891 0.031961171402311556 -0.06189810563204051
ADVB:  (21222,) (35157,) 0.603635122450721
ADV2:  0.16279058319598413 0.3070644894771973 0.3852915021354809 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5471   0.1952   0.9343 130.3035  38.5834  37.2821
***** Episode 81404, Mean R = -11.1  Std R = 5.7  Min R = -25.8
PolicyLoss: 1.51
Policy_Entropy: 0.23
Policy_KL: 0.00608
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps:

ADVA:  (20471,) (35257,) 0.5806222877726409
ADV1:  0.0 -0.0012871661319720361 0.008546690439655617 0.06503081861924115 -0.07555975308326168
ADVB:  (17798,) (35257,) 0.5048075559463369
ADV2:  0.006125395573067072 0.24633237767394797 0.3938259668246431 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4991   0.1791   0.9148 130.3035  38.5834  37.2821
***** Episode 81621, Mean R = -10.9  Std R = 6.3  Min R = -25.7
PolicyLoss: 1.44
Policy_Entropy: 0.23
Policy_KL: 0.00684
Policy_SD: 0.537
Steps: 1.19e+04
TotalSteps: 3.02e+07
VF_0_ExplainedVarNew: 0.957
VF_0_ExplainedVarOld: 0.954
VF_0_Loss : 0.000242


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0017   0.0073   9.0125   2.2441   3.0464
ADVA:  (20096,) (35608,) 0.5643675578521681
ADV1:  0.0008379357023461081 0.00013999604545142673 0.008310289351471191 0.06503081861924115 -0.07361861472013648
ADVB:  (21934,) (35608,) 0.6159851718714896
ADV2:  0.1851139507277963 0.3818307070872356 0.48691012868506534 3.0 0.0
Policy

attitude |    0.01   -0.00    0.09 |    1.17    0.68    1.85 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.01    0.05 |    0.67    1.85 |   -1.51   -3.14 |    1.42    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.02 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.53
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -0.99 |    0.94    0.99
cs_angles |  0.0021  0.0021 |  0.0761  0.0778 | -0.9829 -0.9930 |  0.9403  0.9937
optical_flow | -0.0000 -0.0001 |  0.0193  0.0215 | -1.0847 -1.4935 |  1.0225  1.0876
v_err    | -0.0107 |  0.0596 | -0.4558 |  0.1019
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (20002,) (35175,) 0.5686425017768302
ADV1:  0.0009704515727144251 0.0004411087073010664 0.007002473854603644 0.04595576450454891 -0.08879871102318637
ADVB:  (21954,) (35175,) 0.624136460554371
ADV2:  0.22188497911932084 0.39043899649191405 0.4894240032979152 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2452   0.0742   0.3969 130.3035  38.5834  37.2821
Update Cnt = 2650    ET =   1351.2   Stats:  Mean, Std, Min, Max
r_f      |   16.04    4.77   -3.96 |  183.04  163.51  205.72 | -382.98 -362.64 -383.10 |  385.68  391.24  375.55
v_f      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.11    0.09    0.11
r_i      |   49.88  -27.95  -12.25 |  652.71  658.34  783.62 |-1294.68-1250.36-1251.66 | 1325.88 1232.88 1329.18
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.39
norm_vf  |    0.08 |    0.01 |    0.03 |    0.11
gs_f    

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0014   9.0125   2.2441   3.0464
ADVA:  (20783,) (34745,) 0.5981580083465247
ADV1:  0.000567461645365145 -0.0005186492042496012 0.008511476157958706 0.0492449052701332 -0.08013113789545218
ADVB:  (20571,) (34745,) 0.5920564109943877
ADV2:  0.14624084368252957 0.31884766705478224 0.4152903898537325 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6828   0.3191   1.4202 130.3035  38.5834  37.2821
***** Episode 82458, Mean R = -10.8  Std R = 4.8  Min R = -24.3
PolicyLoss: 1.6
Policy_Entropy: 0.231
Policy_KL: 0.0129
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 3.06e+07
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.00018


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0019   0.0066   9.0125   2.2441   3.0464
ADVA:  (21204,) (34806,) 0.6092053094294088
ADV1:  0.0016674862775173556 0.0004792447271184815 0.008109664939608813 0.0492449052701332 -0.08013113789545218
ADVB:  (2

***** Episode 82675, Mean R = -9.3  Std R = 5.1  Min R = -26.4
PolicyLoss: 1.68
Policy_Entropy: 0.231
Policy_KL: 0.00923
Policy_SD: 0.542
Steps: 1.18e+04
TotalSteps: 3.06e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000111


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0036   9.0125   2.2441   3.0464
ADVA:  (22067,) (35308,) 0.6249858389033647
ADV1:  0.0005894111783996143 -0.00018143466601689434 0.006870064462969892 0.04218957091312392 -0.07509642309412412
ADVB:  (19711,) (35308,) 0.5582587515577206
ADV2:  0.08815700351125112 0.28192470057221497 0.4023883547735775 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2533   0.0834   0.4638 130.3035  38.5834  37.2821
***** Episode 82706, Mean R = -9.2  Std R = 4.7  Min R = -20.3
PolicyLoss: 1.49
Policy_Entropy: 0.232
Policy_KL: 0.00871
Policy_SD: 0.532
Steps: 1.19e+04
TotalSteps: 3.06e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 5.7e-05


ValFun  Gr

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2373   0.6435   2.5187 130.3035  38.5834  37.2821
***** Episode 82923, Mean R = -8.6  Std R = 4.4  Min R = -28.9
PolicyLoss: 1.76
Policy_Entropy: 0.232
Policy_KL: 0.00901
Policy_SD: 0.545
Steps: 1.18e+04
TotalSteps: 3.07e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 5.73e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0008   0.0031   9.0125   2.2441   3.0464
ADVA:  (18679,) (35175,) 0.5310305614783227
ADV1:  0.0004949999484391253 0.0003067909085691173 0.005697925978100742 0.04154145322434136 -0.05803390952680224
ADVB:  (21854,) (35175,) 0.6212935323383084
ADV2:  0.2059148427217779 0.39814150004539867 0.513358814806036 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   9.6561   5.9378  21.9549 130.3035  38.5834  37.2821
***** Episode 82954, Mean R = -8.9  Std R = 5.0  Min R = -20.2
PolicyLoss: 1.89
Policy_Entropy: 0.232
Policy_KL: 0.0095
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 

ADVA:  (22649,) (35056,) 0.6460805568233683
ADV1:  0.001215661222398909 0.00016376440872733908 0.007982384795176566 0.05588879918812212 -0.0694268858351505
ADVB:  (22431,) (35056,) 0.6398619351894113
ADV2:  0.1914733962718098 0.33275555678829705 0.4093599639484705 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2949   0.1163   0.5734 130.3035  38.5834  37.2821
***** Episode 83171, Mean R = -9.2  Std R = 5.4  Min R = -25.8
PolicyLoss: 1.54
Policy_Entropy: 0.231
Policy_KL: 0.0071
Policy_SD: 0.538
Steps: 1.17e+04
TotalSteps: 3.08e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 5.96e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0003   0.0010   9.0125   2.2441   3.0464
ADVA:  (22499,) (34923,) 0.6442459124359304
ADV1:  0.001612112877943367 0.00034898399557416833 0.00850102841451053 0.05588879918812212 -0.06318967010920146
ADVB:  (23391,) (34923,) 0.6697878189159007
ADV2:  0.24328498007823954 0.3559886497208368 0.411233735216107 3.0 

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.04   -0.06 |    0.69    1.87 |   -1.48   -3.14 |    1.52    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.50
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.95   -0.99 |    0.92    0.98
cs_angles |  0.0011  0.0037 |  0.0732  0.0770 | -0.9544 -0.9917 |  0.9211  0.9757
optical_flow | -0.0001 -0.0001 |  0.0189  0.0210 | -0.9939 -1.0080 |  0.9926  1.0298
v_err    | -0.0107 |  0.0603 | -0.4534 |  0.0986
landing_rewards |    9.65 |    1.85 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -14.74 |    4.49 |  -44.65 |   -8.24
steps    |     376 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6807   0.3550   1.3083 130.3035  38.5834  37.2821
Update Cnt = 2700    ET =   1384.1   Stats:  Mean, Std, Min, Max
r_f      |   -6.35    7.62    2.32 |  188.74  181.20  191.35 | -382.98 -381.18 -390.76 |  380.96  395.17  375.23
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.09    0.11    0.12
r_i      |  -18.65   37.32   12.09 |  685.50  681.96  741.25 |-1348.34-1312.26-1321.25 | 1285.42 1286.13 1265.74
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.15 |    0.06 |    0.04 |    0.34
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.27 |    1.99 |    0.00 |   19.30
thrust   |    0.00    0.00    0.00 |    0.66    0.68    0.67 |   -3.40   -3.30   -3.29 |    3.39    3.43    3.43
norm_thrust |    0.90 |    0.73 |    0.00 |    3.46
fuel     |    1.48 |    0.19 |    1.03 |    2.22
rewards  |   -9.82 

ADVA:  (21072,) (35294,) 0.5970419901399672
ADV1:  0.0002443518853068004 -0.0005903344213601463 0.007455826826055916 0.036764121728266985 -0.07360946881379893
ADVB:  (19691,) (35294,) 0.5579135263784213
ADV2:  0.09183600974745293 0.28408976764868327 0.40830616152889104 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5420   0.2008   0.9526 130.3035  38.5834  37.2821
***** Episode 84008, Mean R = -9.7  Std R = 5.7  Min R = -24.8
PolicyLoss: 1.5
Policy_Entropy: 0.232
Policy_KL: 0.0107
Policy_SD: 0.527
Steps: 1.18e+04
TotalSteps: 3.11e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.000174


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0020   0.0070   9.0125   2.2441   3.0464
ADVA:  (20846,) (35324,) 0.5901370173253312
ADV1:  0.0012739335271321076 0.0004241088450092388 0.007286857543074255 0.03781654409309404 -0.07360946881379893
ADVB:  (22332,) (35324,) 0.6322047333257842
ADV2:  0.23294042877605758 0.3867872258442615 0.470190810283334

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0034   0.0017   0.0058   9.0125   2.2441   3.0464
ADVA:  (21245,) (35122,) 0.6048915209839987
ADV1:  0.0002352908314411442 -0.0003292605300304153 0.0070648595549743665 0.054642545857513214 -0.08751874255902398
ADVB:  (19024,) (35122,) 0.5416548032572177
ADV2:  0.061916928214403366 0.29075048525070896 0.4365481581061921 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3173   0.1231   0.6031 130.3035  38.5834  37.2821
***** Episode 84256, Mean R = -9.7  Std R = 5.4  Min R = -25.5
PolicyLoss: 1.58
Policy_Entropy: 0.232
Policy_KL: 0.00718
Policy_SD: 0.528
Steps: 1.19e+04
TotalSteps: 3.12e+07
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.000122


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0033   0.0018   0.0061   9.0125   2.2441   3.0464
ADVA:  (19473,) (34985,) 0.556609975703873
ADV1:  0.0011148466167661235 0.0006590510027854335 0.00674613366927145 0.054642545857513214 -0.0672315773590268
ADVB

***** Episode 84473, Mean R = -9.6  Std R = 4.3  Min R = -19.5
PolicyLoss: 1.73
Policy_Entropy: 0.231
Policy_KL: 0.02
Policy_SD: 0.53
Steps: 1.18e+04
TotalSteps: 3.13e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.000234


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0021   0.0073   9.0125   2.2441   3.0464
ADVA:  (19235,) (35498,) 0.5418615133246943
ADV1:  0.0 -0.00017360540725881526 0.006698479365512223 0.04433718716980184 -0.08500102427456002
ADVB:  (19830,) (35498,) 0.5586230210152685
ADV2:  0.0981024166893994 0.35985332483788296 0.5145986678778423 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.9607   2.4424  11.1352 130.3035  38.5834  37.2821
***** Episode 84504, Mean R = -11.1  Std R = 6.2  Min R = -27.1
PolicyLoss: 1.9
Policy_Entropy: 0.231
Policy_KL: 0.00815
Policy_SD: 0.538
Steps: 1.19e+04
TotalSteps: 3.13e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.000193


ValFun  Gradients: u/sd/Max/C Ma

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3628   0.5378   3.0349 130.3035  38.5834  37.2821
***** Episode 84721, Mean R = -8.5  Std R = 3.9  Min R = -20.3
PolicyLoss: 2.19
Policy_Entropy: 0.232
Policy_KL: 0.00719
Policy_SD: 0.525
Steps: 1.2e+04
TotalSteps: 3.14e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 7.75e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0011   0.0044   9.0125   2.2441   3.0464
ADVA:  (21759,) (35334,) 0.6158091356766854
ADV1:  0.0 -0.0007417685321559913 0.0064141782053335405 0.04402109075804789 -0.05244652973054681
ADVB:  (16204,) (35334,) 0.45859512084677645
ADV2:  0.0 0.23930780178668504 0.4164461200621316 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3796   0.1294   0.6701 130.3035  38.5834  37.2821
***** Episode 84752, Mean R = -10.3  Std R = 5.8  Min R = -23.9
PolicyLoss: 1.53
Policy_Entropy: 0.232
Policy_KL: 0.00957
Policy_SD: 0.53
Steps: 1.17e+04
TotalSteps: 3.14e+07
VF_0_ExplainedVarNe

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.10    0.06 |    0.67    1.81 |   -1.47   -3.14 |    1.55    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.44
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.93   -1.00 |    0.98    0.99
cs_angles |  0.0025  0.0028 |  0.0724  0.0765 | -0.9255 -0.9996 |  0.9836  0.9852
optical_flow |  0.0000 -0.0000 |  0.0188  0.0208 | -0.9588 -0.9731 |  1.2420  1.0465
v_err    | -0.0112 |  0.0600 | -0.4521 |  0.1036
landing_rewards |    9.77 |    1.49 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -14.81 |    4.06 |  -28.44 |   -8.15
steps    |     380 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5753   0.1953   0.9486 130.3035  38.5834  37.2821
Update Cnt = 2750    ET =   1795.9   Stats:  Mean, Std, Min, Max
r_f      |   -4.62   11.63  -31.71 |  176.31  165.44  207.02 | -377.07 -386.22 -389.07 |  390.77  386.25  374.86
v_f      |    0.00   -0.00    0.01 |    0.04    0.04    0.05 |   -0.09   -0.11   -0.10 |    0.09    0.09    0.09
r_i      |  -41.72   29.02 -104.99 |  654.04  641.33  796.93 |-1287.13-1272.32-1345.69 | 1293.82 1275.64 1279.78
v_i      |    0.00   -0.00    0.01 |    0.04    0.04    0.05 |   -0.10   -0.08   -0.09 |    0.09    0.10    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.36
norm_vf  |    0.07 |    0.01 |    0.04 |    0.12
gs_f     |    1.43 |    2.14 |    0.01 |   22.11
thrust   |    0.00    0.00    0.00 |    0.66    0.67    0.65 |   -3.44   -3.33   -3.43 |    3.36    3.42    3.38
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.46 |    0.20 |    1.01 |    2.15
rewards  |   -9.41 

ADVA:  (18142,) (35056,) 0.5175148334094021
ADV1:  0.000606177335910417 0.0005759954027124443 0.005260504647759584 0.04184553891439119 -0.07108682120806548
ADVB:  (22274,) (35056,) 0.6353833865814696
ADV2:  0.26231228932360007 0.4754125196849718 0.5923988579950608 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4033   0.1284   0.6728 130.3035  38.5834  37.2821
***** Episode 85558, Mean R = -7.3  Std R = 3.7  Min R = -17.2
PolicyLoss: 2.2
Policy_Entropy: 0.233
Policy_KL: 0.00497
Policy_SD: 0.524
Steps: 1.16e+04
TotalSteps: 3.17e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 9.93e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0018   0.0063   9.0125   2.2441   3.0464
ADVA:  (22086,) (35202,) 0.6274075336628601
ADV1:  0.0 -0.0010106900558297799 0.006664796675306805 0.04184553891439119 -0.07108682120806548
ADVB:  (15720,) (35202,) 0.4465655360490881
ADV2:  0.0 0.19267755011984872 0.34338405587616927 3.0 0.0
Policy  Gradients: u/sd/M

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0016   0.0062   9.0125   2.2441   3.0464
ADVA:  (20629,) (35167,) 0.5866010748713283
ADV1:  0.0 -0.0006760943931014388 0.008311258355276997 0.10487654085795234 -0.07242799031667052
ADVB:  (18013,) (35167,) 0.5122131543776837
ADV2:  0.022103928477194244 0.30907333664342146 0.4593528033674101 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7281   0.3593   1.3966 130.3035  38.5834  37.2821
***** Episode 85806, Mean R = -10.0  Std R = 4.7  Min R = -24.3
PolicyLoss: 1.77
Policy_Entropy: 0.233
Policy_KL: 0.00755
Policy_SD: 0.537
Steps: 1.18e+04
TotalSteps: 3.18e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.000381


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0017   0.0065   9.0125   2.2441   3.0464
ADVA:  (18563,) (35273,) 0.5262665494854422
ADV1:  0.00041960805170240464 7.57810805

***** Episode 86023, Mean R = -10.1  Std R = 5.3  Min R = -25.1
PolicyLoss: 1.56
Policy_Entropy: 0.233
Policy_KL: 0.00589
Policy_SD: 0.526
Steps: 1.17e+04
TotalSteps: 3.19e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 9.23e-05


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0047   0.0028   0.0102   9.0125   2.2441   3.0464
ADVA:  (19773,) (35198,) 0.561764873004148
ADV1:  0.0015286586873003733 0.00089525704830818 0.007471762545270304 0.0514461340052102 -0.06061336713012244
ADVB:  (24289,) (35198,) 0.6900676174782658
ADV2:  0.343470264421774 0.4843002724625597 0.5334295009823917 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6930   0.2805   1.2867 130.3035  38.5834  37.2821
***** Episode 86054, Mean R = -6.8  Std R = 2.9  Min R = -14.0
PolicyLoss: 2.08
Policy_Entropy: 0.233
Policy_KL: 0.00892
Policy_SD: 0.525
Steps: 1.17e+04
TotalSteps: 3.19e+07
VF_

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4423   0.1284   0.6651 130.3035  38.5834  37.2821
***** Episode 86271, Mean R = -9.7  Std R = 4.0  Min R = -20.3
PolicyLoss: 1.79
Policy_Entropy: 0.233
Policy_KL: 0.00652
Policy_SD: 0.54
Steps: 1.16e+04
TotalSteps: 3.2e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 8.01e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0015   0.0052   9.0125   2.2441   3.0464
ADVA:  (20994,) (35192,) 0.5965560354626052
ADV1:  8.824852682130097e-05 -0.0006930168598833157 0.007196685483236352 0.05284666364496887 -0.06554913005097485
ADVB:  (18702,) (35192,) 0.5314275971811775
ADV2:  0.04304604591571032 0.2525589495057136 0.3868927915531774 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4864   0.1478   0.7852 130.3035  38.5834  37.2821
***** Episode 86302, Mean R = -10.3  Std R = 5.4  Min R = -25.9
PolicyLoss: 1.39
Policy_Entropy: 0.233
Policy_KL: 0.00757
Policy_SD: 0.533
Steps: 1.18e+04
TotalStep

seeker_angles |    0.00    0.00 |    0.08    0.07 |   -0.99   -1.00 |    1.00    0.97
cs_angles |  0.0029  0.0030 |  0.0760  0.0744 | -0.9940 -0.9970 |  0.9987  0.9686
optical_flow |  0.0001 -0.0000 |  0.0181  0.0193 | -0.9860 -1.3092 |  1.0097  1.0800
v_err    | -0.0117 |  0.0608 | -0.4518 |  0.1024
landing_rewards |    9.61 |    1.93 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -14.99 |    3.89 |  -28.42 |   -7.79
steps    |     376 |      20 |     333 |     418
***** Episode 86550, Mean R = -8.8  Std R = 2.9  Min R = -15.0
PolicyLoss: 2.35
Policy_Entropy: 0.234
Policy_KL: 0.00624
Policy_SD: 0.527
Steps: 1.19e+04
TotalSteps: 3.21e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.000346


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0012   0.0044   9.0125   2.2441   3.0464
ADVA:  (19080,) (35111,) 0.54341

attitude |   -0.00    0.05    0.09 |    1.14    0.67    1.87 |   -3.14   -1.55   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.04    0.12 |    0.67    1.88 |   -1.37   -3.09 |    1.48    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.02 |    0.04    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.71
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.94   -0.97 |    0.97    0.98
cs_angles |  0.0028  0.0036 |  0.0729  0.0788 | -0.9381 -0.9660 |  0.9747  0.9842
optical_flow |  0.0001 -0.0001 |  0.0185  0.0211 | -0.9429 -1.2280 |  1.0839  1.0216
v_err    | -0.0117 |  0.0607 | -0.4537 |  0.0973
landing_rewards |    9.65 |    1.85 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9139   0.4282   1.8330 130.3035  38.5834  37.2821
Update Cnt = 2810    ET =   1912.5   Stats:  Mean, Std, Min, Max
r_f      |  -19.20   18.36    2.22 |  188.45  157.63  208.22 | -397.66 -386.68 -393.24 |  392.36  392.68  380.75
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.11    0.09    0.10
r_i      |  -50.06   84.95  -23.64 |  677.50  630.96  785.08 |-1327.02-1358.54-1292.90 | 1268.22 1303.69 1263.43
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.08   -0.09   -0.09 |    0.10    0.09    0.09
norm_rf  |    0.13 |    0.06 |    0.02 |    0.40
norm_vf  |    0.07 |    0.01 |    0.04 |    0.12
gs_f     |    1.29 |    1.68 |    0.00 |   15.14
thrust   |    0.00   -0.01   -0.00 |    0.66    0.66    0.66 |   -3.36   -3.40   -3.35 |    3.46    3.37    3.39
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.47 |    0.17 |    1.04 |    2.31
rewards  |   -8.96 

ADVA:  (20236,) (35337,) 0.5726575544047315
ADV1:  0.0012775973789987778 0.00048260379400298013 0.007340357147684477 0.05426858853916122 -0.07264531452208844
ADVB:  (23188,) (35337,) 0.6561960551263548
ADV2:  0.27591222578484137 0.4254839467193324 0.49977934012987324 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4412   0.1473   0.7697 130.3035  38.5834  37.2821
***** Episode 87418, Mean R = -10.6  Std R = 6.1  Min R = -26.7
PolicyLoss: 1.92
Policy_Entropy: 0.233
Policy_KL: 0.00988
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 3.24e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 5.68e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0018   0.0073   9.0125   2.2441   3.0464
ADVA:  (20807,) (35320,) 0.5890996602491506
ADV1:  0.000448775662247782 -0.0002621675999082385 0.007989459017223123 0.05556919014923037 -0.06814445564466115
ADVB:  (21380,) (35320,) 0.6053227633069083
ADV2:  0.18569628480930628 0.3703622593900451 0.475435778712098

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0020   9.0125   2.2441   3.0464
ADVA:  (18639,) (35204,) 0.5294568799000113
ADV1:  0.0009728683311648889 0.0007815209445938349 0.006468253994058779 0.04159407904386048 -0.08239797525731873
ADVB:  (22709,) (35204,) 0.6450687421883877
ADV2:  0.3158255488863195 0.5223874056279366 0.6157463562596488 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.3417   5.4300  16.5038 130.3035  38.5834  37.2821
***** Episode 87666, Mean R = -9.5  Std R = 4.4  Min R = -20.1
PolicyLoss: 2.39
Policy_Entropy: 0.234
Policy_KL: 0.0117
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 3.25e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 6.1e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0010   9.0125   2.2441   3.0464
ADVA:  (19724,) (35182,) 0.5606275936558467
ADV1:  0.0 -0.0004576546583052585 0.005478210920335657 0.04159407904386048 -0.08239797525731873
ADVB:  (17482,) (35182,) 0

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0009   0.0031   9.0125   2.2441   3.0464
ADVA:  (20729,) (35392,) 0.5856973327305606
ADV1:  0.0 -0.0004662339201787945 0.0051377231480120205 0.03453808978432227 -0.07328903023171363
ADVB:  (16645,) (35392,) 0.47030402350813744
ADV2:  0.0 0.2737657329416304 0.46164145912104465 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  12.2319  11.2691  55.9272 130.3035  38.5834  37.2821
***** Episode 87914, Mean R = -8.2  Std R = 4.0  Min R = -20.2
PolicyLoss: 1.71
Policy_Entropy: 0.233
Policy_KL: 0.0297
Policy_SD: 0.528
Steps: 1.17e+04
TotalSteps: 3.26e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.000202


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0013   0.0045   9.0125   2.2441   3.0464
ADVA:  (19797,) (35285,) 0.5610599404846252
ADV1:  0.00043973171083921004 0.00031011731251894345 0.005

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0019   0.0067   9.0125   2.2441   3.0464
ADVA:  (19880,) (35044,) 0.5672868394018947
ADV1:  0.0 -0.00028919580345747293 0.005252861137437718 0.042876467790724127 -0.054868483642737075
ADVB:  (17739,) (35044,) 0.5061922155005136
ADV2:  0.012639341852586822 0.31483629224206694 0.49344122868257606 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6279   0.2555   1.0594 130.3035  38.5834  37.2821
***** Episode 88162, Mean R = -9.6  Std R = 5.2  Min R = -23.8
PolicyLoss: 1.82
Policy_Entropy: 0.233
Policy_KL: 0.00706
Policy_SD: 0.526
Steps: 1.16e+04
TotalSteps: 3.27e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 7.87e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0016   0.0060   9.0125   2.2441   3.0464
ADVA:  (19928,) (35095,) 0.567830175238638
ADV1:  0.00032904401404500357 6.233997

cs_angles |  0.0018  0.0025 |  0.0758  0.0763 | -0.9817 -0.9860 |  0.9863  0.9963
optical_flow | -0.0000 -0.0001 |  0.0186  0.0192 | -1.1233 -0.9965 |  1.0637  1.0414
v_err    | -0.0111 |  0.0604 | -0.4535 |  0.0971
landing_rewards |    9.71 |    1.68 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -14.88 |    4.16 |  -39.72 |   -8.35
steps    |     378 |      20 |     331 |     416
***** Episode 88410, Mean R = -9.8  Std R = 4.7  Min R = -23.5
PolicyLoss: 1.7
Policy_Entropy: 0.234
Policy_KL: 0.00653
Policy_SD: 0.54
Steps: 1.16e+04
TotalSteps: 3.28e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.000302


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0005   0.0019   9.0125   2.2441   3.0464
ADVA:  (20220,) (35036,) 0.5771206758762416
ADV1:  0.00011345246806278991 -0.0002478140527117893 0.006612219776356793 0

thrust   |   -0.00    0.00   -0.00 |    0.67    0.65    0.67 |   -3.33   -3.39   -3.17 |    3.36    3.36    3.34
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.47 |    0.16 |    1.11 |    1.97
rewards  |   -9.24 |    4.64 |  -27.35 |   -1.97
fuel_rewards |   -4.20 |    0.47 |   -5.65 |   -3.18
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    3.13 |   13.85 |    0.01 | 1018.60
norm_af  |    1.80 |    0.91 |    0.03 |    3.33
norm_wf  |    0.01 |    0.01 |    0.00 |    0.04
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |   -0.08    0.02    0.05 |    1.20    0.65    1.90 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |   -0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.02    0.01 |    0.66

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8579   0.4154   1.8632 130.3035  38.5834  37.2821
***** Episode 88999, Mean R = -7.6  Std R = 4.0  Min R = -18.3
PolicyLoss: 1.57
Policy_Entropy: 0.235
Policy_KL: 0.00633
Policy_SD: 0.528
Steps: 1.17e+04
TotalSteps: 3.3e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 1.79e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0030   9.0125   2.2441   3.0464
ADVA:  (20410,) (35215,) 0.5795825642481897
ADV1:  0.0 -0.0005057624128837646 0.005882663700220931 0.05834675389925098 -0.08084351485177116
ADVB:  (17073,) (35215,) 0.4848218088882578
ADV2:  0.0 0.2427077341357552 0.40747186969914423 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5911   0.2243   1.0368 130.3035  38.5834  37.2821
Update Cnt = 2870    ET =   1849.1   Stats:  Mean, Std, Min, Max
r_f      |   -3.00    8.07   -5.83 |  182.76  167.23  205.43 | -391.77 -391.90 -382.59 |  387.22  391.77  376.12
v_f      |    0.00

ADVA:  (19254,) (35165,) 0.5475330584387885
ADV1:  0.0 -0.0006495890017103334 0.00638367024452241 0.03527456525191097 -0.06181775086502243
ADVB:  (18667,) (35165,) 0.5308403241859804
ADV2:  0.054146976436606306 0.292582850257666 0.42840527076570173 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6814   0.2769   1.3444 130.3035  38.5834  37.2821
***** Episode 89247, Mean R = -11.5  Std R = 5.9  Min R = -31.2
PolicyLoss: 1.61
Policy_Entropy: 0.235
Policy_KL: 0.00795
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps: 3.31e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00019


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0039   0.0019   0.0067   9.0125   2.2441   3.0464
ADVA:  (19640,) (35142,) 0.5588754197256843
ADV1:  0.00025202599495510736 -0.00022234326650780861 0.006737468156652996 0.04360019008836147 -0.07325936250128945
ADVB:  (20200,) (35142,) 0.5748107677423027
ADV2:  0.12970585208728208 0.3358594270144659 0.45972651456669633 3.0 0.0
Polic

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0002   0.0006   9.0125   2.2441   3.0464
ADVA:  (19820,) (35302,) 0.5614412781145545
ADV1:  0.0 -0.0002409622087494516 0.005795601542502633 0.08477137342950503 -0.05180202766934383
ADVB:  (18755,) (35302,) 0.5312730156931619
ADV2:  0.0629563175161949 0.3227093758875098 0.48457740564883417 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4411   0.1712   0.7842 130.3035  38.5834  37.2821
***** Episode 89495, Mean R = -10.0  Std R = 5.1  Min R = -26.4
PolicyLoss: 1.77
Policy_Entropy: 0.235
Policy_KL: 0.0124
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps: 3.32e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.000156


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0030   9.0125   2.2441   3.0464
ADVA:  (19039,) (34969,) 0.5444536589550745
ADV1:  0.0002555006427352891 0.000170705249

***** Episode 89712, Mean R = -8.4  Std R = 5.2  Min R = -26.0
PolicyLoss: 1.7
Policy_Entropy: 0.235
Policy_KL: 0.00908
Policy_SD: 0.524
Steps: 1.18e+04
TotalSteps: 3.33e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 9.75e-05


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0039   9.0125   2.2441   3.0464
ADVA:  (19414,) (35269,) 0.5504550738608976
ADV1:  0.0004227537920358671 0.0001231415349891907 0.005775368803163471 0.05210464336865933 -0.1001511074850081
ADVB:  (20157,) (35269,) 0.5715217329666279
ADV2:  0.13298215515735498 0.3590533035785903 0.5052923927313725 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9157   0.3855   1.7688 130.3035  38.5834  37.2821
***** Episode 89743, Mean R = -7.7  Std R = 4.6  Min R = -24.6
PolicyLoss: 1.84
Policy_Entropy: 0.235
Policy_KL: 0.00765
Policy_SD: 0.519
Steps: 1.18e+04
TotalSteps: 3.33e+07


***** Episode 89960, Mean R = -9.7  Std R = 4.5  Min R = -23.8
PolicyLoss: 1.96
Policy_Entropy: 0.234
Policy_KL: 0.00702
Policy_SD: 0.531
Steps: 1.16e+04
TotalSteps: 3.34e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 3.33e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0008   9.0125   2.2441   3.0464
ADVA:  (18910,) (35207,) 0.5371090976226318
ADV1:  0.0 -0.00018271178630845135 0.00561692261705353 0.038118788150402594 -0.0591847936842195
ADVB:  (19560,) (35207,) 0.5555713352458318
ADV2:  0.10776458232553143 0.3640456066017318 0.5157649018977158 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.5975   1.8025   7.1013 130.3035  38.5834  37.2821
***** Episode 89991, Mean R = -8.4  Std R = 3.9  Min R = -20.4
PolicyLoss: 1.92
Policy_Entropy: 0.234
Policy_KL: 0.00856
Policy_SD: 0.535
Steps: 1.17e+04
TotalSteps: 3.34e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.000143


Dynamics: Max Disturbance (m/s

attitude |   -0.06    0.00    0.08 |    1.23    0.68    1.85 |   -3.14   -1.53   -3.14 |    3.14    1.54    3.14
w        |   -0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.00    0.09 |    0.67    1.84 |   -1.46   -3.13 |    1.49    3.14
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.01   -0.02 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.42
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -1.00   -1.00 |    0.99    0.99
cs_angles |  0.0009  0.0042 |  0.0724  0.0764 | -0.9959 -0.9989 |  0.9854  0.9896
optical_flow | -0.0001 -0.0001 |  0.0208  0.0204 | -0.9637 -1.2110 |  0.9925  1.1924
v_err    | -0.0116 |  0.0607 | -0.4534 |  0.0908
landing_rewards |    9.77 |    1.49 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.3464   1.5728   6.0518 130.3035  38.5834  37.2821
Update Cnt = 2920    ET =   1548.6   Stats:  Mean, Std, Min, Max
r_f      |  -10.20  -10.03    4.09 |  188.03  180.79  194.58 | -385.99 -378.88 -384.18 |  381.60  394.75  375.15
v_f      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.08 |    0.09    0.09    0.10
r_i      |  -56.58  -39.92    8.75 |  691.46  673.91  743.75 |-1252.51-1366.04-1275.51 | 1321.46 1300.23 1281.04
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.01 |    0.32
norm_vf  |    0.07 |    0.01 |    0.04 |    0.12
gs_f     |    1.51 |    3.62 |    0.01 |   42.30
thrust   |   -0.00   -0.00    0.00 |    0.67    0.67    0.67 |   -3.24   -3.46   -3.43 |    3.43    3.41    3.42
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.48 |    0.18 |    1.06 |    2.12
rewards  |   -9.47 

ADVA:  (18922,) (35778,) 0.5288724914752082
ADV1:  0.0002411434546202955 5.587269655117085e-05 0.004944982586870586 0.04038964877392187 -0.05587207042094241
ADVB:  (20460,) (35778,) 0.5718598021130303
ADV2:  0.13650601428981415 0.3688032365058274 0.5032447975016793 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3889   0.5384   2.6531 130.3035  38.5834  37.2821
***** Episode 90828, Mean R = -8.1  Std R = 4.6  Min R = -21.6
PolicyLoss: 1.88
Policy_Entropy: 0.236
Policy_KL: 0.0077
Policy_SD: 0.523
Steps: 1.2e+04
TotalSteps: 3.37e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 5.93e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0046   9.0125   2.2441   3.0464
ADVA:  (20077,) (35621,) 0.5636281968501726
ADV1:  0.0 -0.00041562864164749954 0.005326287743273774 0.0415172553775075 -0.05300774399147085
ADVB:  (16913,) (35621,) 0.4748041885404677
ADV2:  0.0 0.27992674136100065 0.47567981239500334 3.0 0.0
Policy  Gradients: u/sd/Ma

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0003   0.0012   9.0125   2.2441   3.0464
ADVA:  (18296,) (35161,) 0.5203492505901425
ADV1:  0.0 -0.00024453631038778747 0.005791566968146354 0.042448137825609134 -0.11388879226268495
ADVB:  (19249,) (35161,) 0.5474531441085293
ADV2:  0.08565846649554444 0.3521835752406797 0.516083684752306 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7152   0.3138   1.4557 130.3035  38.5834  37.2821
***** Episode 91076, Mean R = -9.3  Std R = 5.9  Min R = -34.5
PolicyLoss: 1.87
Policy_Entropy: 0.236
Policy_KL: 0.00812
Policy_SD: 0.525
Steps: 1.16e+04
TotalSteps: 3.38e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 5.07e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0011   0.0036   9.0125   2.2441   3.0464
ADVA:  (18700,) (35080,) 0.5330672748004561
ADV1:  0.00012932841736680058 1.1779225890

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0011   0.0037   9.0125   2.2441   3.0464
ADVA:  (19472,) (35404,) 0.5499943509208
ADV1:  0.0009265360146384341 0.0005570424271222746 0.006739377387992472 0.0430251337395573 -0.08443781910625126
ADVB:  (22288,) (35404,) 0.6295333860580725
ADV2:  0.23212582275447802 0.4208760028613278 0.5413732094439219 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6320   0.2593   1.2321 130.3035  38.5834  37.2821
***** Episode 91324, Mean R = -10.8  Std R = 5.7  Min R = -26.8
PolicyLoss: 1.96
Policy_Entropy: 0.235
Policy_KL: 0.00604
Policy_SD: 0.54
Steps: 1.2e+04
TotalSteps: 3.39e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 7.12e-05


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0020   9.0125   2.2441   3.0464
ADVA:  (18917,) (35253,) 0.5366068136045159
ADV1:  0.0006389163692813934 0.

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0019   9.0125   2.2441   3.0464
ADVA:  (19257,) (35231,) 0.5465924895688457
ADV1:  0.0 -0.00031634590346771475 0.00539508886710602 0.06090101042883739 -0.061515850283871576
ADVB:  (17919,) (35231,) 0.5086145723936306
ADV2:  0.01792507949206148 0.33343815875260885 0.5047219284883832 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4797   0.1707   0.8162 130.3035  38.5834  37.2821
***** Episode 91572, Mean R = -9.6  Std R = 4.9  Min R = -22.7
PolicyLoss: 1.91
Policy_Entropy: 0.235
Policy_KL: 0.00925
Policy_SD: 0.542
Steps: 1.18e+04
TotalSteps: 3.4e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 7.04e-05


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0005   0.0025   9.0125   2.2441   3.0464
ADVA:  (18853,) (35191,) 0.53573356824188
ADV1:  0.0 -0.00017634726235228604 0.0053141

theta_cv |    0.21 |    0.25 |    0.00 |    1.50
seeker_angles |   -0.00    0.00 |    0.07    0.08 |   -0.95   -0.95 |    0.86    1.00
cs_angles | -0.0006  0.0003 |  0.0747  0.0769 | -0.9516 -0.9527 |  0.8639  0.9988
optical_flow | -0.0001  0.0000 |  0.0192  0.0185 | -0.9370 -1.0997 |  0.8883  1.0596
v_err    | -0.0111 |  0.0605 | -0.4538 |  0.1027
landing_rewards |    9.74 |    1.59 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -14.86 |    3.96 |  -33.34 |   -8.35
steps    |     379 |      20 |     336 |     421
***** Episode 91820, Mean R = -10.5  Std R = 6.6  Min R = -29.4
PolicyLoss: 1.49
Policy_Entropy: 0.235
Policy_KL: 0.00996
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 3.41e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 6.98e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0015   0.0045   9.0125   2.2441   3.0464
ADVA:  (20924,) (35396,) 0.5911402418352356
ADV1:  0.0004433004714241833 -0.

attitude |   -0.00   -0.01    0.08 |    1.05    0.67    1.74 |   -3.14   -1.51   -3.14 |    3.14    1.57    3.14
w        |   -0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.00    0.08 |    0.68    1.73 |   -1.50   -3.08 |    1.55    3.13
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.02 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.24 |    0.00 |    1.45
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.98   -0.99 |    0.99    1.00
cs_angles |  0.0004  0.0015 |  0.0716  0.0710 | -0.9845 -0.9900 |  0.9873  0.9958
optical_flow | -0.0001  0.0001 |  0.0207  0.0203 | -1.0854 -0.9447 |  1.0486  0.9628
v_err    | -0.0114 |  0.0607 | -0.4575 |  0.1004
landing_rewards |    9.77 |    1.49 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4831   0.7260   3.0883 130.3035  38.5834  37.2821
Update Cnt = 2980    ET =   1502.9   Stats:  Mean, Std, Min, Max
r_f      |   -9.80    7.30    5.12 |  181.68  175.67  196.66 | -382.47 -383.64 -382.50 |  387.40  385.36  381.44
v_f      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.08    0.09
r_i      |  -29.32    5.36   14.71 |  671.82  668.85  767.29 |-1369.73-1256.50-1304.75 | 1340.43 1292.50 1343.58
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.09    0.10
norm_rf  |    0.13 |    0.05 |    0.02 |    0.31
norm_vf  |    0.07 |    0.01 |    0.04 |    0.12
gs_f     |    1.42 |    2.34 |    0.00 |   21.22
thrust   |    0.00   -0.00   -0.00 |    0.66    0.67    0.66 |   -3.13   -3.44   -3.26 |    3.27    3.41    3.39
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.47 |    0.17 |    1.07 |    2.06
rewards  |   -9.20 

ADVA:  (18395,) (35351,) 0.520353031031654
ADV1:  0.0004494260906205919 0.0002568447628770216 0.005444716186094925 0.03399690857613627 -0.061083370831492234
ADVB:  (21371,) (35351,) 0.6045373539645271
ADV2:  0.19755241329614373 0.40740387368128844 0.5383047065241058 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3530   0.4709   2.2798 130.3035  38.5834  37.2821
***** Episode 92688, Mean R = -9.4  Std R = 5.5  Min R = -22.5
PolicyLoss: 1.96
Policy_Entropy: 0.236
Policy_KL: 0.00716
Policy_SD: 0.529
Steps: 1.19e+04
TotalSteps: 3.44e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 2.9e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0041   0.0019   0.0077   9.0125   2.2441   3.0464
ADVA:  (20004,) (35557,) 0.5625896447956802
ADV1:  0.0 -0.0006666322539795425 0.006494081929683331 0.03272320387338873 -0.0832789367945454
ADVB:  (19231,) (35557,) 0.5408499029726918
ADV2:  0.0648953296575789 0.2832458950980787 0.4239236653930406 3.0 0.0
Policy  Gra

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0005   0.0022   9.0125   2.2441   3.0464
ADVA:  (19928,) (35020,) 0.5690462592804112
ADV1:  0.00046718734807383797 0.0004444155855134295 0.005007253115002882 0.056762828296876344 -0.05476829169539004
ADVB:  (18945,) (35020,) 0.5409765848086807
ADV2:  0.09002605027541485 0.36943662338471844 0.5290851143532871 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6883   0.2570   1.1917 130.3035  38.5834  37.2821
***** Episode 92936, Mean R = -9.7  Std R = 5.0  Min R = -21.4
PolicyLoss: 2
Policy_Entropy: 0.235
Policy_KL: 0.0102
Policy_SD: 0.533
Steps: 1.16e+04
TotalSteps: 3.45e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 9.84e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0007   9.0125   2.2441   3.0464
ADVA:  (18104,) (34970,) 0.5177008864741207
ADV1:  0.000147058982842041

***** Episode 93153, Mean R = -7.5  Std R = 5.1  Min R = -28.2
PolicyLoss: 1.85
Policy_Entropy: 0.235
Policy_KL: 0.00978
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 3.46e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.000211


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0003   9.0125   2.2441   3.0464
ADVA:  (19579,) (35423,) 0.5527199841910623
ADV1:  0.00016797803408829734 -2.9385664126364787e-05 0.005035503157005222 0.03647043782504744 -0.13156534611389842
ADVB:  (18775,) (35423,) 0.5300228664991672
ADV2:  0.06540226141434363 0.36084624915435093 0.5203835983009023 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4222   0.4985   2.7425 130.3035  38.5834  37.2821
***** Episode 93184, Mean R = -7.8  Std R = 4.2  Min R = -18.8
PolicyLoss: 1.99
Policy_Entropy: 0.235
Policy_KL: 0.0104
Policy_SD: 0.525
Steps: 1.18e+04
TotalSteps: 3.46

ADVA:  (20889,) (35400,) 0.5900847457627119
ADV1:  0.0011820649833471442 0.0007460126858841125 0.0063316303204700615 0.07569244688073917 -0.08176121436323186
ADVB:  (22801,) (35400,) 0.6440960451977401
ADV2:  0.24003769695892765 0.40467565943428924 0.5055355803589177 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.1299   1.2911   6.4945 130.3035  38.5834  37.2821
***** Episode 93401, Mean R = -8.1  Std R = 3.3  Min R = -15.4
PolicyLoss: 1.84
Policy_Entropy: 0.235
Policy_KL: 0.0205
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 3.47e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 8.08e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0004   9.0125   2.2441   3.0464
ADVA:  (20963,) (35379,) 0.5925266400972328
ADV1:  0.0010702950678652956 0.000934207108480534 0.00567663579772442 0.07569244688073917 -0.08176121436323186
ADVB:  (22105,) (35379,) 0.6248056756833149
ADV2:  0.21256583769264473 0.41262687171109075 0.5337820169594806 

attitude |    0.11   -0.02    0.07 |    1.11    0.64    1.84 |   -3.14   -1.57   -3.14 |    3.14    1.55    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.02    0.04 |    0.64    1.84 |   -1.50   -3.14 |    1.53    3.10
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.38
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.96   -0.97 |    0.99    0.98
cs_angles |  0.0025  0.0038 |  0.0732  0.0758 | -0.9562 -0.9667 |  0.9944  0.9780
optical_flow | -0.0001 -0.0001 |  0.0182  0.0180 | -1.1047 -0.8814 |  1.2110  1.1072
v_err    | -0.0115 |  0.0609 | -0.4628 |  0.0940
landing_rewards |    9.81 |    1.38 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (21375,) (35035,) 0.6101041815327529
ADV1:  0.00025392090178894856 -0.000450105628730337 0.006812119515537942 0.045136655193879494 -0.08933220390079466
ADVB:  (18579,) (35035,) 0.530298273155416
ADV2:  0.04872712725578558 0.2661170310654094 0.38896710221253156 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1442   0.4913   2.0149 130.3035  38.5834  37.2821
Update Cnt = 3030    ET =   1406.0   Stats:  Mean, Std, Min, Max
r_f      |    5.33   12.40    9.72 |  185.74  182.04  192.36 | -388.03 -391.92 -380.15 |  384.35  387.72  390.75
v_f      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.11 |    0.10    0.09    0.11
r_i      |   24.75   55.47   17.89 |  669.93  669.39  763.24 |-1374.57-1343.98-1301.93 | 1300.44 1338.45 1314.26
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.01 |    0.35
norm_vf  |    0.07 |    0.01 |    0.03 |    0.12
gs_f  

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0004   0.0015   9.0125   2.2441   3.0464
ADVA:  (19374,) (35380,) 0.5475975127190503
ADV1:  0.000458950414808177 0.000119753223113658 0.006231514320516109 0.07177825546790706 -0.08413616674362995
ADVB:  (20842,) (35380,) 0.5890898812888637
ADV2:  0.14762961841853056 0.3424898628512259 0.4628908783391763 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4786   0.2139   1.0014 130.3035  38.5834  37.2821
***** Episode 94238, Mean R = -8.3  Std R = 3.8  Min R = -17.5
PolicyLoss: 1.7
Policy_Entropy: 0.236
Policy_KL: 0.0059
Policy_SD: 0.53
Steps: 1.16e+04
TotalSteps: 3.5e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 2.75e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0005   0.0021   9.0125   2.2441   3.0464
ADVA:  (19850,) (35150,) 0.5647226173541963
ADV1:  0.0006525294296693025 0.0

***** Episode 94455, Mean R = -7.5  Std R = 3.3  Min R = -15.9
PolicyLoss: 1.79
Policy_Entropy: 0.236
Policy_KL: 0.00784
Policy_SD: 0.521
Steps: 1.18e+04
TotalSteps: 3.51e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.000173


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0013   0.0043   9.0125   2.2441   3.0464
ADVA:  (20138,) (35007,) 0.5752563772959693
ADV1:  0.0 -0.0005085953972388757 0.005454990818414281 0.041706958637691416 -0.059584320030686394
ADVB:  (17483,) (35007,) 0.49941440283371896
ADV2:  0.0 0.2820019575926282 0.4506472605206499 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5264   0.2153   1.0915 130.3035  38.5834  37.2821
***** Episode 94486, Mean R = -9.9  Std R = 4.3  Min R = -19.0
PolicyLoss: 1.64
Policy_Entropy: 0.236
Policy_KL: 0.0079
Policy_SD: 0.523
Steps: 1.15e+04
TotalSteps: 3.51e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.000117


Dynamics: Max Disturbance (m/s^2):  [0.00143

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2954   0.1056   0.4907 130.3035  38.5834  37.2821
***** Episode 94703, Mean R = -8.6  Std R = 5.0  Min R = -19.0
PolicyLoss: 1.59
Policy_Entropy: 0.236
Policy_KL: 0.00737
Policy_SD: 0.52
Steps: 1.18e+04
TotalSteps: 3.52e+07
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 8.39e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0032   0.0010   0.0051   9.0125   2.2441   3.0464
ADVA:  (18466,) (35193,) 0.5247066177933112
ADV1:  0.0010326555126173803 0.0008131545730286146 0.006592813340929382 0.03616896445957171 -0.05856631625920011
ADVB:  (22736,) (35193,) 0.6460375642883528
ADV2:  0.3027457395765244 0.506393641506359 0.6016756929602204 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9720   0.4402   2.0248 130.3035  38.5834  37.2821
***** Episode 94734, Mean R = -9.1  Std R = 3.2  Min R = -15.0
PolicyLoss: 2.29
Policy_Entropy: 0.237
Policy_KL: 0.00652
Policy_SD: 0.519
Steps: 1.17e+04
TotalSteps: 

ADVA:  (18062,) (35320,) 0.5113816534541337
ADV1:  0.0003026390036814548 -0.00014059274149730004 0.007112082527810317 0.03985467138068077 -0.10029172208482523
ADVB:  (21527,) (35320,) 0.609484711211778
ADV2:  0.21650044442237176 0.41826990749848664 0.5168045922187011 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4626   0.1662   0.8285 130.3035  38.5834  37.2821
***** Episode 94951, Mean R = -10.0  Std R = 4.9  Min R = -29.5
PolicyLoss: 2
Policy_Entropy: 0.237
Policy_KL: 0.00594
Policy_SD: 0.529
Steps: 1.19e+04
TotalSteps: 3.53e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 5.55e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0010   0.0034   9.0125   2.2441   3.0464
ADVA:  (18437,) (35568,) 0.5183591992802519
ADV1:  0.0011522218674860831 0.0007504799822158829 0.0065711613145290615 0.05863516043359457 -0.07769077638623029
ADVB:  (23919,) (35568,) 0.6724865047233468
ADV2:  0.33894028127701564 0.5044134240464866 0.5661671931454806

rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |   -0.00    0.01    0.04 |    1.19    0.65    1.87 |   -3.14   -1.55   -3.14 |    3.14    1.56    3.14
w        |   -0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.02    0.02 |    0.65    1.87 |   -1.43   -3.14 |    1.54    3.12
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.01   -0.02 |    0.03    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.50
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.99   -0.98 |    0.96    0.99
cs_angles |  0.0013  0.0017 |  0.0729  0.0762 | -0.9947 -0.9786 |  0.9589  0.9915
optical_flow | -0.0000  0.0001 |  0.0190  0.0177 | -0.91

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0006   0.0026   9.0125   2.2441   3.0464
ADVA:  (19208,) (35415,) 0.5423690526613018
ADV1:  0.0005793336924835076 0.0003882108772998257 0.005424997529792729 0.04354157205223813 -0.07055778862142675
ADVB:  (20834,) (35415,) 0.588281801496541
ADV2:  0.18735370581979507 0.41032764380530684 0.5254374708348567 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7394   0.2757   1.3865 130.3035  38.5834  37.2821
Update Cnt = 3080    ET =   1372.9   Stats:  Mean, Std, Min, Max
r_f      |    6.59   -6.06  -23.83 |  193.74  174.71  192.83 | -394.48 -376.16 -398.06 |  382.73  372.80  390.63
v_f      |   -0.00    0.00    0.00 |    0.05    0.04    0.05 |   -0.12   -0.10   -0.11 |    0.10    0.09    0.09
r_i      |   19.78  -18.82  -76.71 |  695.52  652.42  766.74 |-1315.70-1364.69-1269.56 | 1323.65 1258.21 1328.79
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
nor

***** Episode 95757, Mean R = -8.8  Std R = 3.3  Min R = -18.1
PolicyLoss: 1.79
Policy_Entropy: 0.235
Policy_KL: 0.121
Policy_SD: 0.532
Steps: 1.16e+04
TotalSteps: 3.56e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.000211


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0011   0.0038   9.0125   2.2441   3.0464
ADVA:  (19954,) (34816,) 0.5731272977941176
ADV1:  0.000126356879670865 -0.000174890078443043 0.006004800485919388 0.03694302279147693 -0.07971960327044669
ADVB:  (19103,) (34816,) 0.5486845128676471
ADV2:  0.08402387607099468 0.32103324590439913 0.46757024424545396 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.6208   2.2478   9.8283 130.3035  38.5834  37.2821
***** Episode 95788, Mean R = -9.8  Std R = 4.4  Min R = -18.0
PolicyLoss: 1.7
Policy_Entropy: 0.236
Policy_KL: 0.0202
Policy_SD: 0.535
Steps: 1.17e+04
TotalSteps: 3.56e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.00014


Dynamics: Max Di

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8143   0.3296   1.6349 130.3035  38.5834  37.2821
***** Episode 96005, Mean R = -9.5  Std R = 3.9  Min R = -17.6
PolicyLoss: 1.85
Policy_Entropy: 0.238
Policy_KL: 0.00535
Policy_SD: 0.52
Steps: 1.17e+04
TotalSteps: 3.57e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 2.02e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0011   0.0056   9.0125   2.2441   3.0464
ADVA:  (20275,) (35028,) 0.5788226561607857
ADV1:  0.00010815787410214939 -0.0005005226032617477 0.007068072761590524 0.06954242195471383 -0.05928970287072429
ADVB:  (19458,) (35028,) 0.5554984583761562
ADV2:  0.08711256679809262 0.2977928655160636 0.4191267240109207 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4819   0.2196   1.0053 130.3035  38.5834  37.2821
***** Episode 96036, Mean R = -9.7  Std R = 4.1  Min R = -18.3
PolicyLoss: 1.56
Policy_Entropy: 0.238
Policy_KL: 0.00546
Policy_SD: 0.527
Steps: 1.16e+04
TotalSte

ADVA:  (20791,) (35114,) 0.5921000170872017
ADV1:  0.000832884041927771 0.0003247788093811167 0.006619831321679021 0.05905023372921919 -0.0743168852578846
ADVB:  (20995,) (35114,) 0.5979096656604204
ADV2:  0.1768108421010428 0.37385563792964327 0.494671031314954 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7780   0.2483   1.4007 130.3035  38.5834  37.2821
***** Episode 96253, Mean R = -9.9  Std R = 4.1  Min R = -18.9
PolicyLoss: 1.83
Policy_Entropy: 0.237
Policy_KL: 0.00884
Policy_SD: 0.526
Steps: 1.17e+04
TotalSteps: 3.58e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 3.24e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0004   0.0013   9.0125   2.2441   3.0464
ADVA:  (19631,) (35139,) 0.5586670081675631
ADV1:  0.0010655342872638082 0.0005401267882727747 0.006304083755877294 0.05905023372921919 -0.07478318185957022
ADVB:  (22738,) (35139,) 0.6470872819374485
ADV2:  0.2850107415013789 0.45012137414483894 0.5419719252577366 3.0 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0022   9.0125   2.2441   3.0464
ADVA:  (20051,) (35040,) 0.5722317351598174
ADV1:  0.0008158723169951372 0.00046220906075593413 0.006602839498377825 0.05221251778905156 -0.07953525677878626
ADVB:  (20907,) (35040,) 0.5966609589041096
ADV2:  0.1695434776772763 0.39443913674931325 0.5253176964929881 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6553   0.2363   1.3072 130.3035  38.5834  37.2821
***** Episode 96501, Mean R = -8.4  Std R = 3.7  Min R = -17.5
PolicyLoss: 1.92
Policy_Entropy: 0.237
Policy_KL: 0.00632
Policy_SD: 0.526
Steps: 1.17e+04
TotalSteps: 3.59e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 7.53e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0010   0.0033   9.0125   2.2441   3.0464
ADVA:  (20955,) (35346,) 0.5928535053471397
ADV1:  0.0009861299158495287 0.0003062012351270295 0.00669778258512963 0.05221251778905156 -0.07953525677878626
ADVB:  

attitude |    0.05   -0.03    0.12 |    1.19    0.65    1.85 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |   -0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.04    0.09 |    0.66    1.84 |   -1.55   -3.14 |    1.49    3.14
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.02 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.65
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.97   -0.90 |    0.97    0.99
cs_angles |  0.0032  0.0020 |  0.0748  0.0746 | -0.9701 -0.9012 |  0.9670  0.9872
optical_flow | -0.0002  0.0000 |  0.0173  0.0176 | -1.0078 -0.9576 |  0.9512  0.8929
v_err    | -0.0120 |  0.0610 | -0.4597 |  0.1045
landing_rewards |    9.61 |    1.93 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2629   0.4927   2.2799 130.3035  38.5834  37.2821
Update Cnt = 3130    ET =   1332.6   Stats:  Mean, Std, Min, Max
r_f      |    5.22    3.37  -11.44 |  174.92  176.21  197.74 | -394.33 -382.45 -395.56 |  384.54  392.59  371.76
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.09    0.11
r_i      |    1.87   21.35  -28.99 |  644.76  666.38  793.25 |-1265.39-1311.48-1303.16 | 1301.57 1333.57 1338.08
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.08    0.09
norm_rf  |    0.14 |    0.06 |    0.02 |    0.43
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.66 |    3.92 |    0.00 |   57.56
thrust   |    0.00   -0.00   -0.00 |    0.65    0.66    0.66 |   -3.32   -3.29   -3.30 |    3.36    3.46    3.37
norm_thrust |    0.89 |    0.71 |    0.00 |    3.46
fuel     |    1.45 |    0.17 |    1.10 |    2.17
rewards  |   -9.03 

ADVA:  (21934,) (35439,) 0.6189226558311465
ADV1:  0.0 -0.000935134296245327 0.005995850331707965 0.0309077224531647 -0.07227370985406473
ADVB:  (15869,) (35439,) 0.4477835153362115
ADV2:  0.0 0.20783459020767703 0.3637953839368513 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.3352   2.4035   9.2487 130.3035  38.5834  37.2821
***** Episode 97338, Mean R = -10.0  Std R = 5.1  Min R = -19.1
PolicyLoss: 1.34
Policy_Entropy: 0.238
Policy_KL: 0.0135
Policy_SD: 0.531
Steps: 1.17e+04
TotalSteps: 3.62e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.000242


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0012   9.0125   2.2441   3.0464
ADVA:  (22055,) (35102,) 0.6283117771067176
ADV1:  0.0009575769380381783 -0.00025852483062118283 0.007844670172384122 0.0309077224531647 -0.07227370985406473
ADVB:  (22294,) (35102,) 0.6351205059540767
ADV2:  0.18081641457197523 0.30550524130316664 0.382889033621644 3.0 0.0
Policy  Gradients: u/sd/Ma

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0018   0.0064   9.0125   2.2441   3.0464
ADVA:  (22098,) (35019,) 0.6310288700419772
ADV1:  0.0 -0.001040778090379712 0.008315523954601183 0.0521970793114625 -0.09607533356409181
ADVB:  (17709,) (35019,) 0.5056969073931294
ADV2:  0.009199080223313146 0.25932706841971054 0.400764058843461 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4165   0.1482   0.7742 130.3035  38.5834  37.2821
***** Episode 97586, Mean R = -10.2  Std R = 5.9  Min R = -27.3
PolicyLoss: 1.49
Policy_Entropy: 0.238
Policy_KL: 0.00767
Policy_SD: 0.523
Steps: 1.15e+04
TotalSteps: 3.63e+07
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.000693


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0036   0.0020   0.0074   9.0125   2.2441   3.0464
ADVA:  (19879,) (35116,) 0.566095227246839
ADV1:  0.0013757763932658856 0.0005977468039789567 0.007816257352241502 0.0521970793114625 -0.09607533356409181
ADVB:  (22616,) (35116,) 

***** Episode 97803, Mean R = -9.0  Std R = 4.6  Min R = -18.3
PolicyLoss: 1.69
Policy_Entropy: 0.238
Policy_KL: 0.00728
Policy_SD: 0.52
Steps: 1.16e+04
TotalSteps: 3.64e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 3.85e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0009   9.0125   2.2441   3.0464
ADVA:  (19172,) (35186,) 0.5448758028761439
ADV1:  0.0004169493335584081 0.00031410498698370017 0.005981916567632842 0.050859080555255176 -0.07312073663614913
ADVB:  (20473,) (35186,) 0.5818507360882169
ADV2:  0.16476889954497337 0.395499880176998 0.5258034283067077 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4617   0.1671   0.8054 130.3035  38.5834  37.2821
***** Episode 97834, Mean R = -8.9  Std R = 4.0  Min R = -22.7
PolicyLoss: 1.97
Policy_Entropy: 0.238
Policy_KL: 0.0065
Policy_SD: 0.529
Steps: 1.17e+04
TotalSteps: 3.64e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 3.59e-05


ValFun  Gradie

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.7530   2.5432  11.5894 130.3035  38.5834  37.2821
***** Episode 98051, Mean R = -9.8  Std R = 4.4  Min R = -21.0
PolicyLoss: 1.66
Policy_Entropy: 0.237
Policy_KL: 0.0135
Policy_SD: 0.53
Steps: 1.2e+04
TotalSteps: 3.65e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 4.37e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0010   0.0039   9.0125   2.2441   3.0464
ADVA:  (20597,) (35476,) 0.5805896944413125
ADV1:  0.0007485243454940158 0.0003632554759355402 0.006429967676522554 0.07242499007131159 -0.04862377960725972
ADVB:  (21703,) (35476,) 0.6117657007554403
ADV2:  0.19351140947549247 0.3888297889914761 0.5050471510684029 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6661   0.6192   2.8521 130.3035  38.5834  37.2821
***** Episode 98082, Mean R = -7.8  Std R = 4.4  Min R = -23.1
PolicyLoss: 1.85
Policy_Entropy: 0.238
Policy_KL: 0.00843
Policy_SD: 0.523
Steps: 1.18e+04
TotalSteps: 

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.01    0.08 |    0.69    1.84 |   -1.51   -3.12 |    1.52    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.01 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.65
seeker_angles |    0.01    0.00 |    0.07    0.08 |   -1.00   -1.00 |    0.95    0.97
cs_angles |  0.0059  0.0013 |  0.0728  0.0767 | -0.9953 -0.9988 |  0.9456  0.9718
optical_flow | -0.0001  0.0000 |  0.0190  0.0182 | -1.0187 -1.1733 |  1.0639  0.9021
v_err    | -0.0124 |  0.0613 | -0.4613 |  0.0940
landing_rewards |    9.81 |    1.38 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -15.01 |    4.24 |  -30.39 |   -8.24
steps    |     381 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3798   0.1169   0.6177 130.3035  38.5834  37.2821
Update Cnt = 3180    ET =   1196.8   Stats:  Mean, Std, Min, Max
r_f      |    2.22   13.87    7.06 |  196.74  171.92  193.33 | -385.05 -367.35 -385.94 |  383.71  383.51  372.90
v_f      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.10    0.09
r_i      |   18.99   23.44   19.04 |  713.49  661.77  735.55 |-1359.79-1302.77-1341.34 | 1317.99 1332.71 1320.18
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.13 |    0.05 |    0.01 |    0.31
norm_vf  |    0.07 |    0.01 |    0.04 |    0.11
gs_f     |    1.23 |    2.28 |    0.00 |   31.81
thrust   |    0.00    0.00   -0.00 |    0.65    0.66    0.66 |   -3.39   -3.22   -3.33 |    3.42    3.41    3.26
norm_thrust |    0.89 |    0.71 |    0.00 |    3.46
fuel     |    1.45 |    0.16 |    1.04 |    2.07
rewards  |   -8.75 

ADVA:  (20082,) (35270,) 0.5693790757017295
ADV1:  0.0008846125472028966 0.0004110986615498779 0.006564988816954202 0.08233639658885611 -0.0718971780765314
ADVB:  (21522,) (35270,) 0.6102069747660902
ADV2:  0.17871430435270813 0.3521926171169467 0.4636249019863553 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3739   0.1594   0.7665 130.3035  38.5834  37.2821
***** Episode 98888, Mean R = -9.1  Std R = 4.6  Min R = -18.6
PolicyLoss: 1.67
Policy_Entropy: 0.238
Policy_KL: 0.00787
Policy_SD: 0.535
Steps: 1.17e+04
TotalSteps: 3.68e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 5.82e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0015   9.0125   2.2441   3.0464
ADVA:  (18132,) (35202,) 0.5150843702062383
ADV1:  0.0005047824544064938 0.0003594967602040823 0.005426214913123791 0.03678490212066865 -0.0718971780765314
ADVB:  (21885,) (35202,) 0.6216976308164309
ADV2:  0.228287874107526 0.4234332202339108 0.5426883604547413 3.0 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0016   0.0050   9.0125   2.2441   3.0464
ADVA:  (20640,) (35243,) 0.5856482138296967
ADV1:  0.0005012654957865939 0.00012804575640096272 0.006529097941593356 0.04865816192781025 -0.07674963679174596
ADVB:  (20662,) (35243,) 0.5862724512669183
ADV2:  0.12992701200141826 0.3358597640267849 0.46745270734242084 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6326   0.2510   1.0835 130.3035  38.5834  37.2821
***** Episode 99136, Mean R = -9.2  Std R = 4.9  Min R = -19.6
PolicyLoss: 1.66
Policy_Entropy: 0.238
Policy_KL: 0.00916
Policy_SD: 0.533
Steps: 1.17e+04
TotalSteps: 3.69e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.000179


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0021   9.0125   2.2441   3.0464
ADVA:  (21373,) (35143,) 0.6081723245027459
ADV1:  0.0008610986075310769 0.0006740836266393394 0.005684693524504155 0.0509733271644493 -0.07662712068631328
ADVB:

***** Episode 99353, Mean R = -8.3  Std R = 4.6  Min R = -22.3
PolicyLoss: 1.84
Policy_Entropy: 0.239
Policy_KL: 0.0108
Policy_SD: 0.534
Steps: 1.19e+04
TotalSteps: 3.7e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 2.78e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0006   0.0021   9.0125   2.2441   3.0464
ADVA:  (18614,) (35452,) 0.5250479521606679
ADV1:  0.0 -0.00034113224214613687 0.005790048308554892 0.04613008302924895 -0.08153468424116922
ADVB:  (19585,) (35452,) 0.552437098048065
ADV2:  0.08150647274563855 0.31830659363143904 0.47530839213304477 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5078   0.1845   1.0800 130.3035  38.5834  37.2821
***** Episode 99384, Mean R = -7.7  Std R = 4.0  Min R = -23.5
PolicyLoss: 1.66
Policy_Entropy: 0.239
Policy_KL: 0.0101
Policy_SD: 0.538
Steps: 1.19e+04
TotalSteps: 3.7e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 4.99e-05


ValFun  Gradients: u/sd/Max/C M

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  18.0726   8.0033  32.7933 130.3035  38.5834  37.2821
***** Episode 99601, Mean R = -9.2  Std R = 3.1  Min R = -21.4
PolicyLoss: 2.02
Policy_Entropy: 0.239
Policy_KL: 0.0177
Policy_SD: 0.524
Steps: 1.18e+04
TotalSteps: 3.7e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 5.03e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0043   9.0125   2.2441   3.0464
ADVA:  (21450,) (35501,) 0.6042083321596575
ADV1:  0.00016527973247427736 -0.00012760549541482668 0.006067360721099719 0.07735074394018732 -0.0627778143403036
ADVB:  (19164,) (35501,) 0.5398157798371876
ADV2:  0.06201045331313137 0.30039015316068884 0.45732189114385396 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.4967   2.7590  12.7854 130.3035  38.5834  37.2821
***** Episode 99632, Mean R = -8.9  Std R = 4.1  Min R = -19.8
PolicyLoss: 1.61
Policy_Entropy: 0.239
Policy_KL: 0.0164
Policy_SD: 0.528
Steps: 1.18e+04
TotalStep

seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.98   -0.97 |    0.92    0.99
cs_angles |  0.0024  0.0016 |  0.0741  0.0722 | -0.9826 -0.9716 |  0.9221  0.9911
optical_flow |  0.0000  0.0002 |  0.0204  0.0191 | -0.9836 -0.8708 |  1.1529  1.1976
v_err    | -0.0122 |  0.0612 | -0.4525 |  0.0998
landing_rewards |    9.68 |    1.77 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -14.53 |    3.84 |  -29.65 |   -8.46
steps    |     378 |      21 |     335 |     421
***** Episode 99880, Mean R = -9.3  Std R = 5.2  Min R = -24.0
PolicyLoss: 1.56
Policy_Entropy: 0.239
Policy_KL: 0.00523
Policy_SD: 0.524
Steps: 1.17e+04
TotalSteps: 3.72e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.000163


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0004   0.0018   9.0125   2.2441   3.0464
ADVA:  (20680,) (35214,) 0.5872664281251775
ADV1:  0.00075387448854667 1.3634221683369077e-05 0.006726937119838873 0.05597740

attitude |    0.13   -0.04   -0.15 |    1.14    0.63    1.78 |   -3.14   -1.56   -3.14 |    3.14    1.54    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.04   -0.16 |    0.63    1.78 |   -1.48   -3.13 |    1.38    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.02 |    0.03    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.24 |    0.00 |    1.61
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.95   -0.97 |    0.98    0.99
cs_angles |  0.0024  0.0008 |  0.0718  0.0708 | -0.9520 -0.9717 |  0.9844  0.9920
optical_flow | -0.0000  0.0001 |  0.0184  0.0185 | -1.0960 -1.0376 |  0.9401  0.8973
v_err    | -0.0118 |  0.0608 | -0.4535 |  0.0971
landing_rewards |    9.77 |    1.49 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  12.4250   3.6301  21.3727 130.3035  38.5834  37.2821
Update Cnt = 3240    ET =   1556.2   Stats:  Mean, Std, Min, Max
r_f      |    5.31   14.97   -2.71 |  196.09  174.68  188.70 | -396.23 -380.27 -395.54 |  399.57  377.89  398.70
v_f      |   -0.00   -0.00    0.00 |    0.05    0.04    0.05 |   -0.12   -0.09   -0.10 |    0.10    0.11    0.09
r_i      |   -0.47   34.99  -15.69 |  706.43  671.64  725.59 |-1372.74-1291.67-1347.97 | 1309.55 1338.69 1331.41
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.04 |   -0.09   -0.10   -0.09 |    0.10    0.09    0.10
norm_rf  |    0.13 |    0.06 |    0.03 |    0.32
norm_vf  |    0.08 |    0.01 |    0.04 |    0.13
gs_f     |    1.13 |    1.63 |    0.01 |   13.00
thrust   |    0.00    0.00    0.00 |    0.67    0.65    0.66 |   -3.29   -3.43   -3.44 |    3.34    2.98    3.36
norm_thrust |    0.89 |    0.71 |    0.00 |    3.46
fuel     |    1.46 |    0.18 |    1.06 |    2.05
rewards  |   -9.30 

ADVA:  (21099,) (35085,) 0.6013681060282172
ADV1:  0.00033632243935244853 -0.00012037530222634987 0.00644553308148298 0.0423830226891877 -0.09547005446119688
ADVB:  (19421,) (35085,) 0.553541399458458
ADV2:  0.07623877241807062 0.29430187775931493 0.4384012636165458 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5335   0.2076   1.0462 130.3035  38.5834  37.2821
***** Episode 100748, Mean R = -8.3  Std R = 3.8  Min R = -17.2
PolicyLoss: 1.53
Policy_Entropy: 0.24
Policy_KL: 0.00957
Policy_SD: 0.526
Steps: 1.16e+04
TotalSteps: 3.75e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 3.87e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0004   0.0012   9.0125   2.2441   3.0464
ADVA:  (21727,) (35075,) 0.6194440484675695
ADV1:  0.0004890828760036377 -2.586963667826398e-05 0.006742116649626071 0.03410165061307066 -0.07745996943335165
ADVB:  (19839,) (35075,) 0.565616535994298
ADV2:  0.09914626868636021 0.3014815377276516 0.4327624174247185

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0001   0.0001   0.0003   9.0125   2.2441   3.0464
ADVA:  (18654,) (35440,) 0.5263544018058691
ADV1:  0.00015831247460241019 -0.00014738029064057586 0.00547673431736117 0.03618664206645944 -0.05519472253610963
ADVB:  (20407,) (35440,) 0.5758182844243792
ADV2:  0.13321703267927215 0.3457551764224829 0.4730920211677251 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8635   0.3432   1.5638 130.3035  38.5834  37.2821
***** Episode 100996, Mean R = -8.8  Std R = 5.3  Min R = -26.3
PolicyLoss: 1.73
Policy_Entropy: 0.24
Policy_KL: 0.00799
Policy_SD: 0.532
Steps: 1.19e+04
TotalSteps: 3.76e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 2.87e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0003   9.0125   2.2441   3.0464
ADVA:  (18931,) (35335,) 0.5357577472760718
ADV1:  5.647485976566398e-05 -0.00026088147186502267 0.005563651618409206 0.031115047066376245 -0.05519472253610963
A

***** Episode 101213, Mean R = -8.9  Std R = 3.6  Min R = -17.6
PolicyLoss: 1.56
Policy_Entropy: 0.239
Policy_KL: 0.00853
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps: 3.77e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 2.97e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0002   0.0010   9.0125   2.2441   3.0464
ADVA:  (18930,) (35224,) 0.5374176697706109
ADV1:  0.00017872232264679825 9.633589774963932e-05 0.005010196832942245 0.037828865554033475 -0.0893799483271358
ADVB:  (19666,) (35224,) 0.5583125141948672
ADV2:  0.1171689224900328 0.37247937086554056 0.5196599781698387 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6558   0.2276   1.0664 130.3035  38.5834  37.2821
***** Episode 101244, Mean R = -8.6  Std R = 4.0  Min R = -17.9
PolicyLoss: 1.92
Policy_Entropy: 0.239
Policy_KL: 0.00751
Policy_SD: 0.533
Steps: 1.18e+04
TotalSteps: 3.77e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 5.04e-05


ValFun  Gra

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7578   0.2709   1.4596 130.3035  38.5834  37.2821
***** Episode 101461, Mean R = -9.7  Std R = 8.5  Min R = -50.8
PolicyLoss: 1.67
Policy_Entropy: 0.239
Policy_KL: 0.00979
Policy_SD: 0.526
Steps: 1.17e+04
TotalSteps: 3.78e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 9.59e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0008   9.0125   2.2441   3.0464
ADVA:  (20145,) (35104,) 0.573866226071103
ADV1:  0.0 -0.00018256754452200994 0.005233425469992931 0.05004261641304558 -0.08431033862946763
ADVB:  (16942,) (35104,) 0.48262306289881496
ADV2:  0.0 0.2975708685135356 0.4855707157775014 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0274   0.4345   2.1679 130.3035  38.5834  37.2821
***** Episode 101492, Mean R = -7.9  Std R = 4.9  Min R = -26.2
PolicyLoss: 1.77
Policy_Entropy: 0.239
Policy_KL: 0.0132
Policy_SD: 0.526
Steps: 1.16e+04
TotalSteps: 3.78e+07
VF_0_ExplainedVarNe

w        |    0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.03   -0.02 |    0.66    1.88 |   -1.42   -3.13 |    1.53    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.02    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.54
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.98   -0.90 |    0.98    0.88
cs_angles |  0.0019  0.0012 |  0.0745  0.0762 | -0.9808 -0.9047 |  0.9778  0.8828
optical_flow |  0.0002  0.0001 |  0.0185  0.0169 | -1.0698 -0.8395 |  1.1345  0.8266
v_err    | -0.0115 |  0.0609 | -0.4532 |  0.0985
landing_rewards |    9.61 |    1.93 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -14.81 |    4.64 |  -53.81 |   -7.97
steps    |     376 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5262   0.2012   1.0657 130.3035  38.5834  37.2821
Update Cnt = 3290    ET =   1412.0   Stats:  Mean, Std, Min, Max
r_f      |   10.08   -3.42   16.64 |  181.02  171.12  206.66 | -380.30 -378.61 -378.20 |  398.20  396.75  392.03
v_f      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.11    0.09    0.09
r_i      |   42.53  -25.93   60.44 |  659.99  660.94  784.17 |-1245.55-1316.15-1296.79 | 1357.16 1240.95 1334.52
v_i      |   -0.00    0.00   -0.01 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.14 |    0.05 |    0.02 |    0.31
norm_vf  |    0.08 |    0.01 |    0.04 |    0.13
gs_f     |    1.32 |    1.71 |    0.00 |   13.26
thrust   |    0.00   -0.01   -0.00 |    0.66    0.66    0.66 |   -3.40   -3.28   -3.46 |    3.38    3.44    3.36
norm_thrust |    0.90 |    0.71 |    0.00 |    3.46
fuel     |    1.47 |    0.19 |    1.02 |    2.14
rewards  |   -9.00 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.2535   2.6938   8.5406 130.3035  38.5834  37.2821
***** Episode 102298, Mean R = -7.9  Std R = 4.4  Min R = -25.3
PolicyLoss: 1.96
Policy_Entropy: 0.239
Policy_KL: 0.0107
Policy_SD: 0.537
Steps: 1.18e+04
TotalSteps: 3.81e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 5.11e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0017   9.0125   2.2441   3.0464
ADVA:  (19968,) (35350,) 0.5648656294200849
ADV1:  0.0 -0.00036676935739475927 0.005815416628628175 0.04271918367688721 -0.06765681997264673
ADVB:  (17936,) (35350,) 0.5073833097595474
ADV2:  0.012083584385749652 0.2987047502710784 0.4697013477644345 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.7220   1.8372   7.3781 130.3035  38.5834  37.2821
***** Episode 102329, Mean R = -10.3  Std R = 4.7  Min R = -25.9
PolicyLoss: 1.7
Policy_Entropy: 0.239
Policy_KL: 0.0107
Policy_SD: 0.537
Steps: 1.18e+04
TotalSteps: 3.81e+07
VF_

ADVA:  (19392,) (35504,) 0.5461919783686345
ADV1:  7.392766945371783e-05 -0.0002591442608176086 0.005807935178878296 0.040002939776480395 -0.09432571467883677
ADVB:  (19564,) (35504,) 0.5510365029292474
ADV2:  0.0886728925338548 0.33612691996266597 0.4952400439728643 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7037   0.2891   1.4695 130.3035  38.5834  37.2821
***** Episode 102546, Mean R = -10.4  Std R = 5.9  Min R = -30.2
PolicyLoss: 1.76
Policy_Entropy: 0.239
Policy_KL: 0.00942
Policy_SD: 0.535
Steps: 1.17e+04
TotalSteps: 3.82e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 7.22e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0016   9.0125   2.2441   3.0464
ADVA:  (17270,) (35292,) 0.48934602742831235
ADV1:  0.00022526821925193848 0.0002755405188257925 0.005511158869074584 0.09324506020185575 -0.09432571467883677
ADVB:  (21072,) (35292,) 0.5970758245494729
ADV2:  0.18937276642716466 0.44338679823261595 0.5989090144

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0005   9.0125   2.2441   3.0464
ADVA:  (20928,) (35534,) 0.588957055214724
ADV1:  0.0006388348724081323 4.925435527134763e-06 0.006234859257911139 0.03562016729232309 -0.05101559572140209
ADVB:  (21783,) (35534,) 0.6130185174762199
ADV2:  0.17544719316370272 0.34152360225737427 0.4473391746101309 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5157   0.2438   1.0175 130.3035  38.5834  37.2821
***** Episode 102794, Mean R = -8.1  Std R = 4.7  Min R = -24.4
PolicyLoss: 1.61
Policy_Entropy: 0.24
Policy_KL: 0.00694
Policy_SD: 0.533
Steps: 1.19e+04
TotalSteps: 3.83e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 4.17e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0053   9.0125   2.2441   3.0464
ADVA:  (21611,) (35282,) 0.6125219658749504
ADV1:  0.0004338547593690711 -0.0002712105183380627 0.006773425434252754 0.03582465381246852 -0.066233052799549
ADVB:  (

***** Episode 103011, Mean R = -9.7  Std R = 5.2  Min R = -24.7
PolicyLoss: 1.91
Policy_Entropy: 0.238
Policy_KL: 0.0105
Policy_SD: 0.54
Steps: 1.18e+04
TotalSteps: 3.83e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 3.45e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0005   0.0022   9.0125   2.2441   3.0464
ADVA:  (19447,) (35257,) 0.5515784099611425
ADV1:  0.00014627897901203085 6.579226510110518e-05 0.005409102797504718 0.04801626367965939 -0.061507901037653745
ADVB:  (19303,) (35257,) 0.5474941146439005
ADV2:  0.09135669017452679 0.3574902709896602 0.5202328833082378 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4362   0.1910   1.0526 130.3035  38.5834  37.2821
***** Episode 103042, Mean R = -8.3  Std R = 3.5  Min R = -15.4
PolicyLoss: 1.88
Policy_Entropy: 0.24
Policy_KL: 0.00781
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps: 3.84e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.000195


ValFun  Grad

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.01   -0.04 |    0.68    1.85 |   -1.52   -3.14 |    1.50    3.12
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.01 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.46
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -1.00   -1.00 |    0.97    0.96
cs_angles |  0.0039  0.0042 |  0.0680  0.0760 | -0.9969 -0.9990 |  0.9736  0.9601
optical_flow |  0.0000  0.0000 |  0.0174  0.0181 | -1.0917 -1.1472 |  0.9491  0.8905
v_err    | -0.0113 |  0.0600 | -0.4523 |  0.1030
landing_rewards |    9.74 |    1.59 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -14.23 |    3.91 |  -28.74 |   -8.14
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2754   0.0891   0.4966 130.3035  38.5834  37.2821
Update Cnt = 3340    ET =   1476.0   Stats:  Mean, Std, Min, Max
r_f      |    3.39    3.15  -11.87 |  183.56  163.04  203.21 | -393.06 -394.18 -391.04 |  390.07  392.42  393.97
v_f      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.11 |    0.09    0.09    0.09
r_i      |   19.05  -14.32  -36.07 |  680.77  654.04  754.87 |-1309.92-1330.54-1339.61 | 1296.62 1299.05 1279.13
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.13 |    0.06 |    0.01 |    0.35
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.25 |    1.80 |    0.01 |   17.71
thrust   |   -0.00    0.00    0.01 |    0.68    0.67    0.66 |   -3.46   -3.16   -3.35 |    3.10    3.42    3.28
norm_thrust |    0.91 |    0.72 |    0.00 |    3.46
fuel     |    1.47 |    0.18 |    1.04 |    2.46
rewards  |   -9.43 

ADVA:  (19613,) (35249,) 0.5564129478850464
ADV1:  8.923572593192049e-05 -5.512113246753621e-05 0.005491620681565446 0.06211369825729457 -0.0771746877760266
ADVB:  (19028,) (35249,) 0.5398167323895713
ADV2:  0.06906931633161924 0.32294824115541887 0.4828031430204198 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4665   0.2128   0.9180 130.3035  38.5834  37.2821
***** Episode 103848, Mean R = -8.4  Std R = 3.5  Min R = -16.0
PolicyLoss: 1.73
Policy_Entropy: 0.239
Policy_KL: 0.00797
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 3.87e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 2.93e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0016   9.0125   2.2441   3.0464
ADVA:  (19014,) (35257,) 0.5392971608474912
ADV1:  6.665885175819591e-05 8.176685863590988e-06 0.0050073152690768 0.06211369825729457 -0.07084139500559572
ADVB:  (19036,) (35257,) 0.5399211504098477
ADV2:  0.08018185897181498 0.35574777000951696 0.5105696994380574

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0005   0.0017   9.0125   2.2441   3.0464
ADVA:  (17952,) (35524,) 0.5053484967909019
ADV1:  0.00026396139022606506 0.00028111447512328653 0.005245756863522253 0.050934314190342944 -0.05054196741628341
ADVB:  (21585,) (35524,) 0.6076173854295688
ADV2:  0.20067359558186398 0.43099992109292723 0.5843392111865019 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7255   0.3096   1.4715 130.3035  38.5834  37.2821
***** Episode 104096, Mean R = -8.4  Std R = 4.1  Min R = -20.9
PolicyLoss: 2.04
Policy_Entropy: 0.239
Policy_KL: 0.00861
Policy_SD: 0.537
Steps: 1.18e+04
TotalSteps: 3.88e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.000162


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0043   0.0028   0.0085   9.0125   2.2441   3.0464
ADVA:  (18838,) (35486,) 0.5308572394747224
ADV1:  0.0 -0.0006895835866089327 0.00608564221501071 0.050934314190342944 -0.04813190944752319
ADVB:  (18083,) (3

***** Episode 104313, Mean R = -9.0  Std R = 3.8  Min R = -17.9
PolicyLoss: 1.67
Policy_Entropy: 0.239
Policy_KL: 0.0089
Policy_SD: 0.527
Steps: 1.18e+04
TotalSteps: 3.88e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 2.61e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0012   0.0039   9.0125   2.2441   3.0464
ADVA:  (18079,) (35191,) 0.5137393083458839
ADV1:  0.0008439456523303126 0.0005179764294415923 0.005530908564955505 0.044709468327172985 -0.06996744064310156
ADVB:  (23007,) (35191,) 0.6537751129550169
ADV2:  0.28348705577353567 0.4390991713510982 0.5238479741057845 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8382   0.3223   1.4351 130.3035  38.5834  37.2821
***** Episode 104344, Mean R = -8.9  Std R = 4.5  Min R = -19.5
PolicyLoss: 1.96
Policy_Entropy: 0.239
Policy_KL: 0.00649
Policy_SD: 0.528
Steps: 1.18e+04
TotalSteps: 3.88e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 7.89e-05


ValFun  Gra

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1991   0.6299   2.5088 130.3035  38.5834  37.2821
***** Episode 104561, Mean R = -8.2  Std R = 4.0  Min R = -17.2
PolicyLoss: 1.93
Policy_Entropy: 0.24
Policy_KL: 0.00775
Policy_SD: 0.533
Steps: 1.17e+04
TotalSteps: 3.89e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 3.06e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0009   0.0028   9.0125   2.2441   3.0464
ADVA:  (18376,) (35164,) 0.5225799112728927
ADV1:  1.109143275421047e-05 4.340738390741377e-05 0.005657068609504842 0.03689639845898918 -0.08806205019615565
ADVB:  (20001,) (35164,) 0.5687919463087249
ADV2:  0.12803541308520058 0.3932315737231397 0.5403284063219258 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6531   0.2732   1.2639 130.3035  38.5834  37.2821
***** Episode 104592, Mean R = -9.0  Std R = 4.1  Min R = -20.6
PolicyLoss: 2
Policy_Entropy: 0.239
Policy_KL: 0.00814
Policy_SD: 0.528
Steps: 1.18e+04
TotalSteps: 

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.03   -0.03 |    0.64    1.94 |   -1.52   -3.12 |    1.53    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.01 |    0.02    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.43
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.93   -0.95 |    0.95    0.98
cs_angles |  0.0042  0.0039 |  0.0771  0.0752 | -0.9311 -0.9505 |  0.9473  0.9821
optical_flow |  0.0001  0.0002 |  0.0176  0.0176 | -0.9251 -0.8144 |  1.0602  1.0490
v_err    | -0.0117 |  0.0608 | -0.4549 |  0.1008
landing_rewards |    9.68 |    1.77 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -14.94 |    4.23 |  -32.53 |   -8.17
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7882   0.3746   1.5910 130.3035  38.5834  37.2821
Update Cnt = 3390    ET =   1438.0   Stats:  Mean, Std, Min, Max
r_f      |   -6.14  -10.46   -5.79 |  182.02  168.64  202.89 | -380.39 -369.81 -389.68 |  381.29  390.90  381.54
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.10    0.09    0.09
r_i      |  -26.41   25.86  -14.01 |  669.32  650.93  778.80 |-1295.95-1301.86-1316.54 | 1350.98 1341.25 1353.33
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.12 |    0.05 |    0.02 |    0.29
norm_vf  |    0.07 |    0.01 |    0.05 |    0.11
gs_f     |    1.29 |    1.82 |    0.01 |   18.97
thrust   |    0.00    0.00    0.00 |    0.67    0.66    0.65 |   -3.45   -3.45   -3.28 |    3.28    3.36    3.40
norm_thrust |    0.89 |    0.71 |    0.00 |    3.46
fuel     |    1.47 |    0.17 |    1.06 |    2.03
rewards  |   -9.69 

ADVA:  (18059,) (35149,) 0.5137841759367265
ADV1:  0.00081742696646038 0.0006442433915181502 0.006105218964796505 0.04955403848555584 -0.0620404021071155
ADVB:  (23177,) (35149,) 0.6593928703519304
ADV2:  0.28042054073173095 0.4621001681527423 0.5499167962353588 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   8.9619   2.7513  13.6041 130.3035  38.5834  37.2821
***** Episode 105398, Mean R = -8.9  Std R = 3.8  Min R = -17.4
PolicyLoss: 2.02
Policy_Entropy: 0.241
Policy_KL: 0.0103
Policy_SD: 0.525
Steps: 1.18e+04
TotalSteps: 3.92e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 2.47e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0010   9.0125   2.2441   3.0464
ADVA:  (16801,) (35326,) 0.47559870916605335
ADV1:  0.0 0.0001505299253901217 0.004426288887672376 0.034889706860290526 -0.08168126377141982
ADVB:  (19599,) (35326,) 0.5548038272094208
ADV2:  0.12386598361497028 0.450005181272468 0.6365160077447973 3.0 0.0
Policy  Grad

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0011   0.0034   9.0125   2.2441   3.0464
ADVA:  (21671,) (35303,) 0.6138571792765487
ADV1:  0.0 -0.0006655491720161193 0.006674249256367791 0.0520266592984881 -0.0681085595861936
ADVB:  (18065,) (35303,) 0.5117128855904597
ADV2:  0.01808909981563012 0.24804145044565065 0.38935468880287044 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3297   0.1225   0.5957 130.3035  38.5834  37.2821
***** Episode 105646, Mean R = -9.7  Std R = 4.5  Min R = -18.2
PolicyLoss: 1.38
Policy_Entropy: 0.242
Policy_KL: 0.00573
Policy_SD: 0.52
Steps: 1.17e+04
TotalSteps: 3.93e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 7.95e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0011   0.0042   9.0125   2.2441   3.0464
ADVA:  (20888,) (35148,) 0.5942870149083874
ADV1:  0.0009268235852863127 0.00018587250217425583 0.006677480447922164 0.07067294629906307 -0.0681085595861936
ADVB:  (22287,) (35148,

***** Episode 105863, Mean R = -10.6  Std R = 5.6  Min R = -26.9
PolicyLoss: 1.43
Policy_Entropy: 0.241
Policy_KL: 0.00982
Policy_SD: 0.528
Steps: 1.17e+04
TotalSteps: 3.94e+07
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 8.47e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0014   0.0047   9.0125   2.2441   3.0464
ADVA:  (21933,) (35318,) 0.6210147799988674
ADV1:  0.001336584468600235 0.00032977761232155453 0.007393247423655306 0.03683761370441152 -0.063843135373849
ADVB:  (23136,) (35318,) 0.6550767314117447
ADV2:  0.23714925799055322 0.3561659876110747 0.4215765726197036 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8417   0.2924   1.3835 130.3035  38.5834  37.2821
***** Episode 105894, Mean R = -8.3  Std R = 4.6  Min R = -25.6
PolicyLoss: 1.56
Policy_Entropy: 0.241
Policy_KL: 0.00652
Policy_SD: 0.526
Steps: 1.18e+04
TotalSteps: 3.94e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 7.06e-05


ValFun  Grad

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9901   0.4223   1.9023 130.3035  38.5834  37.2821
***** Episode 106111, Mean R = -8.4  Std R = 4.7  Min R = -19.7
PolicyLoss: 1.98
Policy_Entropy: 0.241
Policy_KL: 0.0081
Policy_SD: 0.52
Steps: 1.17e+04
TotalSteps: 3.95e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 7.34e-05


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0001   0.0000   0.0001   9.0125   2.2441   3.0464
ADVA:  (19359,) (35167,) 0.5504876731026246
ADV1:  0.00016767252069399205 -8.105580569833932e-05 0.00500476373687217 0.05418599525507639 -0.09385225942033554
ADVB:  (19642,) (35167,) 0.5585349901896665
ADV2:  0.10825476384747092 0.34561855880943054 0.48795202341857463 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4134   0.1540   0.7690 130.3035  38.5834  37.2821
***** Episode 106142, Mean R = -9.6  Std R = 6.1  Min R = -26.9
Polic

w        |    0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.06   -0.05 |    0.64    1.81 |   -1.35   -3.13 |    1.43    3.10
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.02 |    0.02    0.03    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.38
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.99   -1.00 |    1.00    0.99
cs_angles |  0.0022  0.0025 |  0.0733  0.0742 | -0.9917 -0.9996 |  0.9983  0.9922
optical_flow |  0.0001  0.0001 |  0.0188  0.0181 | -0.9342 -0.9647 |  0.9549  1.0227
v_err    | -0.0117 |  0.0608 | -0.4536 |  0.0982
landing_rewards |    9.71 |    1.68 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -14.49 |    4.02 |  -27.22 |   -7.43
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7302   0.3405   1.4237 130.3035  38.5834  37.2821
Update Cnt = 3440    ET =   1330.7   Stats:  Mean, Std, Min, Max
r_f      |   -7.52   -3.91   -0.30 |  178.46  171.24  200.13 | -393.82 -358.57 -394.16 |  398.51  375.50  378.72
v_f      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.09 |    0.09    0.10    0.10
r_i      |  -36.56  -18.12  -23.24 |  696.72  633.00  771.61 |-1336.10-1267.66-1383.21 | 1359.23 1278.04 1301.27
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.13 |    0.05 |    0.02 |    0.33
norm_vf  |    0.08 |    0.01 |    0.05 |    0.13
gs_f     |    1.31 |    2.11 |    0.00 |   21.73
thrust   |   -0.00    0.00   -0.00 |    0.66    0.63    0.64 |   -3.46   -3.05   -3.40 |    3.43    3.46    3.42
norm_thrust |    0.86 |    0.71 |    0.00 |    3.46
fuel     |    1.41 |    0.15 |    1.03 |    2.08
rewards  |   -8.42 

ADVA:  (18958,) (35310,) 0.5369017275559331
ADV1:  0.0009292382505095547 0.0005283699153715726 0.006198096439677986 0.05473633131132716 -0.07081568430411
ADVB:  (23271,) (35310,) 0.6590484282073067
ADV2:  0.2694716426928763 0.41541304046026406 0.4860087897509061 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7121   0.2919   1.4800 130.3035  38.5834  37.2821
***** Episode 106948, Mean R = -9.8  Std R = 4.4  Min R = -22.5
PolicyLoss: 1.81
Policy_Entropy: 0.242
Policy_KL: 0.00753
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 3.98e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 9.57e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0022   9.0125   2.2441   3.0464
ADVA:  (19717,) (35258,) 0.5592206024164729
ADV1:  0.0009129010662146456 0.00026779584363732865 0.006818408901978264 0.05473633131132716 -0.09779487884668081
ADVB:  (22966,) (35258,) 0.6513699018662431
ADV2:  0.24337874708433294 0.37505120980233386 0.43305355054132355

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0028   9.0125   2.2441   3.0464
ADVA:  (20643,) (35292,) 0.5849200952057123
ADV1:  0.0 -0.0006882654131513969 0.006651408778087597 0.06032659837514176 -0.09081430974288207
ADVB:  (17209,) (35292,) 0.4876175903887567
ADV2:  0.0 0.2403868013672872 0.39876685045286353 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4719   0.1845   0.9626 130.3035  38.5834  37.2821
***** Episode 107196, Mean R = -10.5  Std R = 5.8  Min R = -27.5
PolicyLoss: 1.41
Policy_Entropy: 0.241
Policy_KL: 0.00895
Policy_SD: 0.526
Steps: 1.17e+04
TotalSteps: 3.99e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 6.05e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0022   0.0069   9.0125   2.2441   3.0464
ADVA:  (18145,) (35348,) 0.513324657689261
ADV1:  0.0008998093325566408 0.000755695633737029 0.005966

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0007   0.0026   9.0125   2.2441   3.0464
ADVA:  (20203,) (35472,) 0.5695478123590437
ADV1:  0.0010917599173539978 0.0007435055347285084 0.006673264605315131 0.062353187350712214 -0.1150135560192716
ADVB:  (22241,) (35472,) 0.6270015787099684
ADV2:  0.20577671975638423 0.39117324652774665 0.49958209068309883 3.0 0.0
 *** BROKE ***   10 0.7491135597229004
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  11.4668  15.1598  54.2829 130.3035  38.5834  37.2821
***** Episode 107444, Mean R = -9.0  Std R = 3.9  Min R = -18.3
PolicyLoss: 1.84
Policy_Entropy: 0.238
Policy_KL: 0.749
Policy_SD: 0.526
Steps: 1.17e+04
TotalSteps: 4e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 1.78e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0010   0.0055   9.0125   2.2441   3.0464
ADVA:  (20251,) (35393,) 0.5721752

  entropy = np.sum( - p * np.log2(p)) / logp.shape[0]
  entropy = np.sum( - p * np.log2(p)) / logp.shape[0]


 *** BROKE ***   0 1.3561184406280518
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.8166   0.0000   5.8166 130.3035  38.5834  37.2821
***** Episode 107537, Mean R = -9.7  Std R = 5.0  Min R = -21.4
PolicyLoss: 1.45
Policy_Entropy: nan
Policy_KL: 1.36
Policy_SD: 0.525
Steps: 1.17e+04
TotalSteps: 4.01e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.000136


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0006   0.0024   9.0125   2.2441   3.0464
ADVA:  (20682,) (35161,) 0.5882085264924206
ADV1:  0.001148551911846308 0.0004559340383976415 0.006701071735534186 0.03414721296190315 -0.0788428761433887
ADVB:  (22952,) (35161,) 0.6527686925855352
ADV2:  0.2410107626344601 0.3801790253147498 0.45613804432921456 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7502   0.5860   3.1921 130.3035  38.5834  37.2821
***** Episode 107568, Mean R = -9.8  Std R = 5.6  Min R = -24.2
PolicyLoss: 1.69
Policy_Entropy: 0.24
Policy_KL: 0.0163
Policy_SD: 

ADVA:  (21216,) (35190,) 0.6028985507246377
ADV1:  0.001411090632451117 0.0005410224401543403 0.007041054203153274 0.058715457240300695 -0.05971921307197686
ADVB:  (23533,) (35190,) 0.6687411196362603
ADV2:  0.25545775835690954 0.3715587348891084 0.44737858271270836 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3816   0.1311   0.6570 130.3035  38.5834  37.2821
***** Episode 107785, Mean R = -7.8  Std R = 3.4  Min R = -16.2
PolicyLoss: 1.6
Policy_Entropy: 0.241
Policy_KL: 0.00604
Policy_SD: 0.526
Steps: 1.17e+04
TotalSteps: 4.01e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 4.26e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0031   9.0125   2.2441   3.0464
ADVA:  (18895,) (35023,) 0.5395026125688833
ADV1:  0.0010692215881302107 0.0005213314526828197 0.006255623190637032 0.043826932338231706 -0.05971921307197686
ADVB:  (23312,) (35023,) 0.6656197356023185
ADV2:  0.2745250430523067 0.4083231350378463 0.4844689812698676 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0006   0.0032   9.0125   2.2441   3.0464
ADVA:  (20347,) (35083,) 0.5799675056295072
ADV1:  0.00025161945519442275 -0.00016250105598820407 0.005977050320877014 0.037222169926591775 -0.04553938439826333
ADVB:  (20401,) (35083,) 0.5815067126528518
ADV2:  0.13826368998828287 0.33550877095879283 0.44348313301997927 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4204   0.1504   0.7718 130.3035  38.5834  37.2821
***** Episode 108033, Mean R = -8.1  Std R = 3.6  Min R = -14.9
PolicyLoss: 1.66
Policy_Entropy: 0.241
Policy_KL: 0.00808
Policy_SD: 0.517
Steps: 1.18e+04
TotalSteps: 4.02e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.000146


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0026   9.0125   2.2441   3.0464
ADVA:  (23077,) (35242,) 0.6548152772260372
ADV1:  0.00038435584363105506 -0.0005550854872920907 0.007077288156418607 0.043841753725043564 -0.05111995332385

***** Episode 108250, Mean R = -10.0  Std R = 5.7  Min R = -27.3
PolicyLoss: 2.02
Policy_Entropy: 0.241
Policy_KL: 0.01
Policy_SD: 0.53
Steps: 1.18e+04
TotalSteps: 4.03e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 1.78e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0042   0.0014   0.0075   9.0125   2.2441   3.0464
ADVA:  (19495,) (35305,) 0.5521880753434357
ADV1:  0.0 -0.0006763850245419326 0.0065704559551651035 0.059677311855402926 -0.06296165138554627
ADVB:  (19992,) (35305,) 0.5662654015012037
ADV2:  0.10828799071429213 0.309099468853181 0.43433445802265636 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5539   0.2216   0.9479 130.3035  38.5834  37.2821
***** Episode 108281, Mean R = -9.2  Std R = 5.0  Min R = -23.1
PolicyLoss: 1.57
Policy_Entropy: 0.242
Policy_KL: 0.0087
Policy_SD: 0.528
Steps: 1.18e+04
TotalSteps: 4.03e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 5.67e-05


ValFun  Gradients: u/sd/Max/C 

rewards  |   -9.12 |    4.74 |  -25.40 |   -1.52
fuel_rewards |   -4.15 |    0.46 |   -5.81 |   -3.06
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    3.12 |   13.06 |    0.00 |  279.27
norm_af  |    1.72 |    0.93 |    0.03 |    3.31
norm_wf  |    0.01 |    0.01 |    0.00 |    0.03
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |    0.06    0.02    0.05 |    1.16    0.65    1.84 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |   -0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.03    0.08 |    0.65    1.84 |   -1.54   -3.14 |    1.50    3.09
w_f      |   -0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.01 |    0.02    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_pena

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5794   0.1868   0.9318 130.3035  38.5834  37.2821
***** Episode 108839, Mean R = -10.7  Std R = 6.1  Min R = -28.1
PolicyLoss: 1.48
Policy_Entropy: 0.241
Policy_KL: 0.00593
Policy_SD: 0.532
Steps: 1.16e+04
TotalSteps: 4.05e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.000136


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0012   0.0049   9.0125   2.2441   3.0464
ADVA:  (20147,) (35205,) 0.5722766652464139
ADV1:  2.582715476245976e-05 -0.0009479836061880091 0.007691779471214769 0.07165528963852358 -0.08959969201132656
ADVB:  (20265,) (35205,) 0.5756284618662122
ADV2:  0.11306624144337164 0.2938183058357886 0.3973292194069694 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5491   0.1629   0.9195 130.3035  38.5834  37.2821
Update Cnt = 3510    ET =   1295.1   Stats:  Mean, Std, Min, Max
r_f      |  -10.33   -1.18    4.66 |  189.10  170.96  195.59 | -390.49 -364.08 -383.22 |  395.1

ADVA:  (20140,) (35480,) 0.5676437429537767
ADV1:  0.0003073751205801903 2.9119275871299766e-05 0.006450815632715539 0.07653008919035847 -0.11354464741213743
ADVB:  (20348,) (35480,) 0.5735062006764374
ADV2:  0.12222274945892211 0.33598492885350634 0.46985050691993085 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.3730   2.3317   9.4558 130.3035  38.5834  37.2821
***** Episode 109087, Mean R = -8.9  Std R = 4.6  Min R = -25.8
PolicyLoss: 1.69
Policy_Entropy: 0.241
Policy_KL: 0.00941
Policy_SD: 0.528
Steps: 1.18e+04
TotalSteps: 4.06e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 2.73e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0007   0.0033   9.0125   2.2441   3.0464
ADVA:  (19700,) (35614,) 0.5531532543381816
ADV1:  0.00032600512975594804 -0.0004193163598652698 0.006991669796085855 0.07653008919035847 -0.05804849038645521
ADVB:  (22800,) (35614,) 0.6401976750716011
ADV2:  0.2159584386602003 0.3536269081181418 0.444172668649

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0012   9.0125   2.2441   3.0464
ADVA:  (21970,) (35304,) 0.6223090867890324
ADV1:  0.0008985727792073014 -4.484908125924435e-05 0.007157854329623664 0.0405250121233075 -0.06967979248100037
ADVB:  (21770,) (35304,) 0.6166440063448901
ADV2:  0.17774188905499566 0.32117548010536856 0.4124764847600527 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3533   0.1183   0.6254 130.3035  38.5834  37.2821
***** Episode 109335, Mean R = -8.7  Std R = 4.1  Min R = -22.3
PolicyLoss: 1.5
Policy_Entropy: 0.241
Policy_KL: 0.0063
Policy_SD: 0.524
Steps: 1.17e+04
TotalSteps: 4.07e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 3.56e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0004   0.0019   9.0125   2.2441   3.0464
ADVA:  (21217,) (35316,) 0.6007758523049043
ADV1:  0.0005980669032906033 -0.00022717078669150912 0.006920065611913543 0.0405250121233075 -0.06967979248100037
ADVB:

***** Episode 109552, Mean R = -9.3  Std R = 4.6  Min R = -21.4
PolicyLoss: 1.73
Policy_Entropy: 0.241
Policy_KL: 0.00692
Policy_SD: 0.529
Steps: 1.17e+04
TotalSteps: 4.08e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.000118


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   9.0125   2.2441   3.0464
ADVA:  (19345,) (35061,) 0.5517526596503237
ADV1:  0.000695525469057821 0.00038236828305715113 0.005957860736016066 0.04747262250616441 -0.05884798542728659
ADVB:  (22327,) (35061,) 0.6368044265708337
ADV2:  0.2400125951210995 0.41666321134892104 0.5157543907936112 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8643   0.3162   1.6940 130.3035  38.5834  37.2821
***** Episode 109583, Mean R = -8.0  Std R = 4.2  Min R = -19.8
PolicyLoss: 1.88
Policy_Entropy: 0.241
Policy_KL: 0.00614
Policy_SD: 0.527
Steps: 1.18e+04
TotalSteps: 4.08e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 3.23e-05


ValFun  Gr

***** Episode 109800, Mean R = -9.5  Std R = 4.9  Min R = -21.5
PolicyLoss: 1.61
Policy_Entropy: 0.241
Policy_KL: 0.00664
Policy_SD: 0.53
Steps: 1.19e+04
TotalSteps: 4.09e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 5.22e-05


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0009   0.0047   9.0125   2.2441   3.0464
ADVA:  (19915,) (35266,) 0.5647082175466455
ADV1:  0.00042568464868063087 9.486876718493589e-05 0.006169687496146808 0.07315791597521698 -0.0751093255239727
ADVB:  (20363,) (35266,) 0.5774116712981342
ADV2:  0.13694420261561757 0.36949633794502956 0.5146207853596687 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2020   0.4924   2.2197 130.3035  38.5834  37.2821
***** Episode 109831, Mean R = -8.6  Std R = 4.6  Min R = -24.2
PolicyLoss: 1.84
Policy_Entropy: 0.24
Policy_KL: 0.00764
Policy_SD: 0.533
Steps: 1.18e+04
TotalSteps: 4.09e+

w        |   -0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.02   -0.15 |    0.66    1.80 |   -1.48   -3.12 |    1.52    3.11
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.01 |    0.02    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.50
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.97   -0.98 |    0.94    0.96
cs_angles |  0.0017  0.0012 |  0.0726  0.0757 | -0.9676 -0.9803 |  0.9381  0.9625
optical_flow | -0.0000  0.0001 |  0.0180  0.0189 | -1.1123 -0.8944 |  1.0797  1.0176
v_err    | -0.0114 |  0.0599 | -0.4532 |  0.0942
landing_rewards |    9.74 |    1.59 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -14.35 |    4.41 |  -42.54 |   -7.58
steps    |     380 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7943   0.2654   1.5071 130.3035  38.5834  37.2821
Update Cnt = 3560    ET =   1292.5   Stats:  Mean, Std, Min, Max
r_f      |    1.63   23.83   -2.78 |  186.24  168.57  201.02 | -390.50 -364.32 -395.83 |  378.20  374.10  393.71
v_f      |   -0.00   -0.01    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.09    0.09
r_i      |   19.32  105.85  -11.12 |  689.61  633.36  772.03 |-1323.79-1338.99-1306.11 | 1294.50 1304.07 1323.24
v_i      |   -0.00   -0.01    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.08    0.10
norm_rf  |    0.13 |    0.06 |    0.00 |    0.35
norm_vf  |    0.08 |    0.01 |    0.03 |    0.12
gs_f     |    1.31 |    1.99 |    0.00 |   17.36
thrust   |    0.00   -0.00   -0.01 |    0.66    0.66    0.66 |   -3.36   -3.36   -3.42 |    3.33    3.37    3.28
norm_thrust |    0.89 |    0.71 |    0.00 |    3.46
fuel     |    1.47 |    0.16 |    1.09 |    1.95
rewards  |   -9.17 

ADVA:  (18936,) (35106,) 0.5393949752179115
ADV1:  0.0 -7.96575076534429e-05 0.0047756497699814205 0.05306136054766203 -0.06270220245254488
ADVB:  (17324,) (35106,) 0.4934768985358628
ADV2:  0.0 0.3717717674615098 0.5934621505594309 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7067   0.2409   1.2703 130.3035  38.5834  37.2821
***** Episode 110668, Mean R = -7.8  Std R = 3.8  Min R = -18.4
PolicyLoss: 2.15
Policy_Entropy: 0.242
Policy_KL: 0.00845
Policy_SD: 0.524
Steps: 1.16e+04
TotalSteps: 4.12e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 3.37e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0042   0.0021   0.0076   9.0125   2.2441   3.0464
ADVA:  (19926,) (35191,) 0.566224318717854
ADV1:  -0.000433046484362775 -0.0014082455167330947 0.006014633409216427 0.05306136054766203 -0.06158724318443573
ADVB:  (14796,) (35191,) 0.42044841010485634
ADV2:  0.0 0.20595521704142136 0.3984828661372904 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0015   9.0125   2.2441   3.0464
ADVA:  (20620,) (34907,) 0.5907124645486579
ADV1:  0.0010383301205603192 0.0006225864328187747 0.005730420334624947 0.04496142731946723 -0.0655314758345445
ADVB:  (21565,) (34907,) 0.6177843985447045
ADV2:  0.2069069553354822 0.385087199691287 0.48576718822150217 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3625   0.1385   0.6668 130.3035  38.5834  37.2821
***** Episode 110916, Mean R = -8.5  Std R = 4.2  Min R = -21.0
PolicyLoss: 1.79
Policy_Entropy: 0.242
Policy_KL: 0.00669
Policy_SD: 0.523
Steps: 1.17e+04
TotalSteps: 4.13e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 3.49e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0007   0.0024   9.0125   2.2441   3.0464
ADVA:  (20417,) (35181,) 0.5803416616923908
ADV1:  0.0006630600169258956 0.00025321783204423193 0.005609847396089378 0.04496142731946723 -0.05540798645503264
ADVB:  

***** Episode 111133, Mean R = -8.8  Std R = 5.6  Min R = -27.3
PolicyLoss: 1.57
Policy_Entropy: 0.241
Policy_KL: 0.00869
Policy_SD: 0.532
Steps: 1.19e+04
TotalSteps: 4.14e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 6.98e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0002   0.0010   9.0125   2.2441   3.0464
ADVA:  (20077,) (35518,) 0.5652626837096684
ADV1:  0.0 -0.0005636054974352412 0.006102054613090534 0.054333081762089064 -0.057471721858424085
ADVB:  (18208,) (35518,) 0.5126414775606735
ADV2:  0.021188931577062232 0.28354835371824527 0.4328258956954677 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4462   0.1398   0.6898 130.3035  38.5834  37.2821
***** Episode 111164, Mean R = -10.5  Std R = 5.9  Min R = -23.2
PolicyLoss: 1.58
Policy_Entropy: 0.242
Policy_KL: 0.0108
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 4.14e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 9.98e-05


ValFun  Gradients: u/sd

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8146   0.3722   1.6286 130.3035  38.5834  37.2821
***** Episode 111381, Mean R = -8.3  Std R = 4.5  Min R = -23.4
PolicyLoss: 1.67
Policy_Entropy: 0.241
Policy_KL: 0.00944
Policy_SD: 0.527
Steps: 1.16e+04
TotalSteps: 4.15e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 6.87e-05


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0011   0.0035   9.0125   2.2441   3.0464
ADVA:  (18630,) (35152,) 0.5299840691852526
ADV1:  0.0004987343334395423 0.0001812376646139044 0.0056221690047248574 0.04336849982453311 -0.06853618309048581
ADVB:  (21775,) (35152,) 0.6194526627218935
ADV2:  0.20635455325250382 0.3740665096541869 0.48754760427946364 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6685   0.2406   1.2724 130.3035  38.5834  37.2821
***** Episode 111412, Mean R = -8.2  Std R = 3.5  Min R = -17.6
Pol

w        |   -0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.06    0.09 |    0.67    1.87 |   -1.46   -3.14 |    1.52    3.14
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.01 |    0.02    0.01    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.44
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.94   -1.00 |    0.99    0.97
cs_angles |  0.0019  0.0021 |  0.0701  0.0752 | -0.9400 -0.9968 |  0.9903  0.9695
optical_flow |  0.0000  0.0001 |  0.0173  0.0182 | -1.0651 -0.9027 |  0.8519  1.0276
v_err    | -0.0115 |  0.0608 | -0.4538 |  0.0934
landing_rewards |    9.97 |    0.57 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -14.42 |    3.76 |  -25.06 |   -8.49
steps    |     380 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  13.6318   9.5972  32.8384 130.3035  38.5834  40.2067
Update Cnt = 3610    ET =   1386.7   Stats:  Mean, Std, Min, Max
r_f      |   -4.03   14.47    1.41 |  183.09  155.27  208.37 | -398.24 -381.77 -385.87 |  394.09  366.86  377.14
v_f      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.08   -0.10 |    0.10    0.08    0.09
r_i      |  -22.55   59.61    7.38 |  672.16  617.24  794.79 |-1270.18-1357.65-1335.53 | 1339.17 1288.77 1341.43
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.10    0.09
norm_rf  |    0.13 |    0.05 |    0.01 |    0.39
norm_vf  |    0.07 |    0.01 |    0.03 |    0.12
gs_f     |    1.54 |    2.22 |    0.01 |   17.04
thrust   |    0.00    0.00    0.00 |    0.66    0.66    0.65 |   -3.17   -3.43   -3.31 |    3.44    3.11    3.32
norm_thrust |    0.89 |    0.71 |    0.00 |    3.46
fuel     |    1.46 |    0.17 |    1.13 |    2.04
rewards  |   -8.83 

ADVA:  (19407,) (35373,) 0.5486387922992113
ADV1:  0.0004087515287525198 -0.00023424733580792051 0.0065279448745725445 0.042536057886599477 -0.060181142927907015
ADVB:  (21024,) (35373,) 0.5943516241200916
ADV2:  0.17091947221629003 0.3602041290304006 0.473716205396079 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2616   0.0937   0.4527 130.3035  38.5834  40.2067
***** Episode 112218, Mean R = -10.3  Std R = 5.6  Min R = -22.2
PolicyLoss: 1.74
Policy_Entropy: 0.242
Policy_KL: 0.0054
Policy_SD: 0.531
Steps: 1.2e+04
TotalSteps: 4.18e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 8.76e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0009   0.0042   9.0125   2.2441   3.0464
ADVA:  (20255,) (35528,) 0.5701137131276739
ADV1:  0.0003876535544989146 -9.014296001816299e-05 0.006208742078298012 0.042536057886599477 -0.060181142927907015
ADVB:  (20228,) (35528,) 0.5693537491555956
ADV2:  0.13040128197700906 0.35836097737540223 0.4828200748

Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0006   0.0020   9.0125   2.2441   3.0464
ADVA:  (21154,) (35361,) 0.5982296880744323
ADV1:  0.0014470807529149273 0.0004638505725124186 0.007650618855766345 0.07652335253601295 -0.06287866782036518
ADVB:  (23313,) (35361,) 0.6592856536862646
ADV2:  0.2642546577497415 0.3849859335601403 0.43317165667377866 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4991   0.1706   0.8256 130.3035  38.5834  40.2067
***** Episode 112466, Mean R = -8.6  Std R = 4.4  Min R = -23.7
PolicyLoss: 1.68
Policy_Entropy: 0.243
Policy_KL: 0.0063
Policy_SD: 0.522
Steps: 1.17e+04
TotalSteps: 4.19e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 4.38e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0005   0.0017   9.0125   2.2441   3.0464
ADVA:  (19821,) (35107,) 0.5645882587518158
ADV1:  0.00090054142900535

***** Episode 112683, Mean R = -8.5  Std R = 5.1  Min R = -21.0
PolicyLoss: 0.941
Policy_Entropy: 0.242
Policy_KL: 0.0094
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps: 4.2e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 5.32e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0005   0.0016   9.0125   2.2441   3.0464
ADVA:  (23772,) (34931,) 0.680541639231628
ADV1:  0.0010896612863643493 -0.0006400841563075874 0.03931793801947812 0.11203722049792611 -2.1537851581414866
ADVB:  (17560,) (34931,) 0.5027053333715038
ADV2:  0.0020325395519987252 0.17513819851920395 0.33261493043803775 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3270   0.1304   0.6320 130.3035  38.5834  40.2067
***** Episode 112714, Mean R = -10.2  Std R = 5.5  Min R = -25.4
PolicyLoss: 0.994
Policy_Entropy: 0.243
Policy_KL: 0.0114
Policy_SD: 0.528
Steps: 1.17e+04
TotalSteps: 4.2e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 6.9e-05


Dynamics: 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8181   0.2788   1.3249 130.3035  38.5834  40.2067
***** Episode 112931, Mean R = -9.6  Std R = 4.9  Min R = -21.8
PolicyLoss: 1.76
Policy_Entropy: 0.243
Policy_KL: 0.00951
Policy_SD: 0.531
Steps: 1.18e+04
TotalSteps: 4.21e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 4.45e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0004   9.0125   2.2441   3.0464
ADVA:  (19948,) (35293,) 0.5652112316890034
ADV1:  5.777780246156439e-05 -0.0002071117466254042 0.006307435281322033 0.05199291278251894 -0.08278392599151713
ADVB:  (19413,) (35293,) 0.5500524183265804
ADV2:  0.08181774248999119 0.305864997004381 0.45154135260066885 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5852   0.1954   0.9534 130.3035  38.5834  40.2067
***** Episode 112962, Mean R = -8.2  Std R = 4.3  Min R = -19.1
PolicyLoss: 1.58
Policy_Entropy: 0.243
Policy_KL: 0.00913
Policy_SD: 0.526
Steps: 1.17e+04
TotalS

attitude |    0.12   -0.03   -0.34 |    1.23    0.65    1.85 |   -3.14   -1.55   -3.14 |    3.14    1.57    3.14
w        |   -0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.02   -0.35 |    0.65    1.86 |   -1.50   -3.13 |    1.54    3.13
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.02    0.03    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.24 |    0.00 |    1.48
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.98   -0.97 |    0.97    1.00
cs_angles |  0.0014  0.0029 |  0.0719  0.0713 | -0.9801 -0.9740 |  0.9697  0.9980
optical_flow |  0.0001  0.0001 |  0.0187  0.0191 | -1.1795 -0.9796 |  1.0534  0.9895
v_err    | -0.0112 |  0.0610 | -0.4537 |  0.1045
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (18882,) (35432,) 0.5329081056671935
ADV1:  0.00030199465566313043 0.0001251512481065635 0.005874213256814657 0.047991128740746986 -0.10272522283688323
ADVB:  (20912,) (35432,) 0.5902009482953262
ADV2:  0.16644305121574549 0.3884438148604548 0.5201087554882251 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9642   0.8477   3.6087 130.3035  38.5834  40.2067
Update Cnt = 3660    ET =   1357.9   Stats:  Mean, Std, Min, Max
r_f      |    4.73   -6.33   -5.38 |  187.69  176.26  200.44 | -384.61 -388.93 -390.70 |  398.96  373.39  396.21
v_f      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.09
r_i      |    6.69  -29.75   -5.84 |  680.80  661.16  768.54 |-1234.21-1324.63-1305.79 | 1245.58 1294.07 1337.23
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.13 |    0.06 |    0.01 |    0.40
norm_vf  |    0.08 |    0.01 |    0.04 |    0.11
gs_f  

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0011   0.0057   9.0125   2.2441   3.0464
ADVA:  (19401,) (35492,) 0.5466302265299222
ADV1:  0.00039168609466483026 -0.0002231592865965727 0.006889749855077524 0.048445536747615425 -0.10516609035356556
ADVB:  (20806,) (35492,) 0.5862166121942973
ADV2:  0.1506046458483818 0.3388704914749115 0.45412252794472746 3.0 0.0
 *** BROKE ***   6 0.5088754892349243
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  10.4197   8.2038  23.1130 130.3035  38.5834  40.2067
***** Episode 113768, Mean R = -8.8  Std R = 4.9  Min R = -25.4
PolicyLoss: 1.71
Policy_Entropy: 0.239
Policy_KL: 0.509
Policy_SD: 0.533
Steps: 1.19e+04
TotalSteps: 4.24e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 2.19e-05


Dynamics: Max Disturbance (m/s^2):  [0.00143928 0.00149071 0.00151695] 0.0025680528336302776
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0011   0.0058   9.0125   2.2441   3.0464
ADVA:  (20788,) (35420,) 0.586

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.0638   0.8074   3.6042 130.3035  38.5834  40.2067
***** Episode 113985, Mean R = -9.4  Std R = 5.5  Min R = -21.4
PolicyLoss: 2.19
Policy_Entropy: 0.243
Policy_KL: 0.0086
Policy_SD: 0.52
Steps: 1.17e+04
TotalSteps: 4.25e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 1.67e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0005   0.0026   9.0125   2.2441   3.0464
ADVA:  (19568,) (35153,) 0.5566523483059768
ADV1:  0.00012325077234969992 -9.115349779064895e-05 0.005053244273625512 0.0756800333505368 -0.06455680396055374
ADVB:  (19060,) (35153,) 0.542201234603021
ADV2:  0.08767790890693018 0.3467626066470128 0.49551273762507514 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2766   0.6468   2.7819 130.3035  38.5834  40.2067
***** Episode 114016, Mean R = -9.3  Std R = 4.7  Min R = -20.1
PolicyLoss: 1.82
Policy_Entropy: 0.243
Policy_KL: 0.00925
Policy_SD: 0.525
Steps: 1.16e+04
TotalSte

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7359   0.3204   1.4445 130.3035  38.5834  40.2067
***** Episode 114233, Mean R = -9.3  Std R = 6.1  Min R = -28.4
PolicyLoss: 1.35
Policy_Entropy: 0.243
Policy_KL: 0.00589
Policy_SD: 0.531
Steps: 1.17e+04
TotalSteps: 4.26e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.000127


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0005   0.0023   9.0125   2.2441   3.0464
ADVA:  (21947,) (35182,) 0.6238133136262861
ADV1:  0.0006365175030586751 -0.00020791389303416753 0.007280347203231303 0.033949630737934944 -0.052436027533081075
ADVB:  (21149,) (35182,) 0.6011312603035643
ADV2:  0.16088153950339018 0.3237294979464269 0.4097321708562131 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8326   0.3656   1.6954 130.3035  38.5834  40.2067
***** Episode 114264, Mean R = -8.2  Std R = 3.8  Min R = -19.6
PolicyLoss: 1.54
Policy_Entropy: 0.243
Policy_KL: 0.00574
Policy_SD: 0.524
Steps: 1.18e+04
Tot

ADVA:  (20286,) (35278,) 0.5750325982198538
ADV1:  0.0005724840740386119 0.00013597675481634764 0.005361309607098047 0.06322837019093264 -0.07399637346955479
ADVB:  (20698,) (35278,) 0.5867112648109303
ADV2:  0.1545633038824738 0.3510943860547677 0.47679565423695897 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9694   0.4395   2.0214 130.3035  38.5834  40.2067
***** Episode 114481, Mean R = -9.3  Std R = 4.5  Min R = -18.1
PolicyLoss: 1.71
Policy_Entropy: 0.243
Policy_KL: 0.00912
Policy_SD: 0.528
Steps: 1.19e+04
TotalSteps: 4.27e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 1.37e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0009   0.0047   9.0125   2.2441   3.0464
ADVA:  (21610,) (35368,) 0.6110042976702104
ADV1:  0.0 -0.0006523738978115681 0.006393255584037952 0.06322837019093264 -0.07721767338923322
ADVB:  (18022,) (35368,) 0.509556661388826
ADV2:  0.014243260974021876 0.2373548006871693 0.3806725986880725 3.0 0.0
Policy 

attitude |   -0.08    0.05   -0.01 |    1.24    0.67    1.91 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.06   -0.02 |    0.67    1.90 |   -1.51   -3.14 |    1.44    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.02 |    0.02    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.39
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.99   -0.87 |    1.00    0.99
cs_angles |  0.0028  0.0037 |  0.0749  0.0736 | -0.9917 -0.8704 |  0.9967  0.9861
optical_flow | -0.0000  0.0002 |  0.0177  0.0192 | -1.0857 -0.9635 |  0.9734  1.0166
v_err    | -0.0112 |  0.0606 | -0.4534 |  0.0930
landing_rewards |    9.81 |    1.38 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  10.4715   4.5879  19.8571 130.3035  38.5834  40.2067
Update Cnt = 3710    ET =   1389.8   Stats:  Mean, Std, Min, Max
r_f      |   12.07   -9.97   -4.33 |  189.26  175.31  197.17 | -380.12 -386.00 -397.46 |  392.19  357.60  384.14
v_f      |   -0.00    0.00    0.00 |    0.04    0.04    0.04 |   -0.08   -0.10   -0.10 |    0.09    0.09    0.08
r_i      |   57.49  -24.05  -16.69 |  689.09  664.73  754.27 |-1244.52-1277.79-1343.85 | 1335.11 1298.15 1335.69
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.45
norm_vf  |    0.07 |    0.01 |    0.04 |    0.12
gs_f     |    1.20 |    1.77 |    0.01 |   20.78
thrust   |   -0.00    0.00   -0.00 |    0.65    0.65    0.64 |   -3.45   -3.04   -3.34 |    3.37    3.36    3.31
norm_thrust |    0.87 |    0.71 |    0.00 |    3.46
fuel     |    1.42 |    0.15 |    1.07 |    1.94
rewards  |   -8.25 

ADVA:  (23392,) (34969,) 0.6689353427321342
ADV1:  0.0005535142200472909 -7.83420326521694e-05 0.007661652788806747 0.044019716965956274 -0.055374246227648916
ADVB:  (18096,) (34969,) 0.517486916983614
ADV2:  0.025546599386956997 0.2671358125673395 0.4008020337966018 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6975   0.2923   1.3343 130.3035  38.5834  40.2067
***** Episode 115318, Mean R = -9.7  Std R = 4.9  Min R = -21.0
PolicyLoss: 1.48
Policy_Entropy: 0.243
Policy_KL: 0.00631
Policy_SD: 0.529
Steps: 1.17e+04
TotalSteps: 4.3e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 5.45e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0001   0.0008   9.0125   2.2441   3.0464
ADVA:  (20531,) (35120,) 0.5845956719817768
ADV1:  0.0013445616539009171 0.000797719420074765 0.0066029084801793704 0.044019716965956274 -0.055374246227648916
ADVB:  (22434,) (35120,) 0.6387813211845103
ADV2:  0.2325440321589679 0.3924912743551746 0.48182862242751

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0004   0.0016   9.0125   2.2441   3.0464
ADVA:  (19743,) (35470,) 0.5566112207499295
ADV1:  0.0 -0.0002014841222799682 0.005130895558496985 0.05006012219516598 -0.10168906906406627
ADVB:  (17723,) (35470,) 0.49966168593177335
ADV2:  0.0 0.31677289149480264 0.501665490618303 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5361   0.2029   0.9484 130.3035  38.5834  40.2067
***** Episode 115566, Mean R = -9.8  Std R = 4.3  Min R = -20.8
PolicyLoss: 1.8
Policy_Entropy: 0.243
Policy_KL: 0.00728
Policy_SD: 0.533
Steps: 1.19e+04
TotalSteps: 4.31e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 1.48e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   9.0125   2.2441   3.0464
ADVA:  (18938,) (35486,) 0.53367525221214
ADV1:  0.0 -0.0002755911932687157 0.005327019357844627 0.05006012219516598 -0.10168906906406627
ADVB:  (18250,) (35486,) 0.5142873245787071
ADV2:  0.0238

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0032   9.0125   2.2441   3.0464
ADVA:  (18821,) (35438,) 0.5310965630114566
ADV1:  0.0006006578638379414 0.0003640839778185029 0.005617544182476633 0.04186228842302059 -0.05673438371748335
ADVB:  (22042,) (35438,) 0.6219876968226198
ADV2:  0.2250724509382062 0.4213543394446972 0.5245953878030144 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7499   0.3511   1.4470 130.3035  38.5834  40.2067
***** Episode 115814, Mean R = -8.2  Std R = 3.5  Min R = -16.8
PolicyLoss: 1.94
Policy_Entropy: 0.243
Policy_KL: 0.00595
Policy_SD: 0.53
Steps: 1.18e+04
TotalSteps: 4.32e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 1.69e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0009   0.0041   9.0125   2.2441   3.0464
ADVA:  (20029,) (35573,) 0.563039383802322
ADV1:  0.00025839271523054284 -4.955197529015933e-06 0.00567518895562948 0.04186228842302059 -0.07563569515943527
ADVB:  (

***** Episode 116031, Mean R = -10.0  Std R = 5.4  Min R = -29.4
PolicyLoss: 1.62
Policy_Entropy: 0.244
Policy_KL: 0.00715
Policy_SD: 0.523
Steps: 1.18e+04
TotalSteps: 4.33e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 3.45e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0011   9.0125   2.2441   3.0464
ADVA:  (19631,) (35400,) 0.55454802259887
ADV1:  0.0007849705560910901 0.00041538884363755374 0.005586737548252054 0.040540335257984506 -0.05374583198247673
ADVB:  (21450,) (35400,) 0.6059322033898306
ADV2:  0.21785773633931216 0.41813361837997237 0.5191815720500496 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5496   0.2056   0.9727 130.3035  38.5834  40.2067
***** Episode 116062, Mean R = -8.1  Std R = 4.3  Min R = -20.8
PolicyLoss: 1.97
Policy_Entropy: 0.244
Policy_KL: 0.00594
Policy_SD: 0.517
Steps: 1.18e+04
TotalSteps: 4.33e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 2.8e-05


ValFun  G

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.03   -0.00 |    0.63    1.82 |   -1.50   -3.14 |    1.55    3.12
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.02 |    0.02    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.46
seeker_angles |    0.00    0.01 |    0.07    0.07 |   -0.96   -0.94 |    0.96    0.96
cs_angles |  0.0019  0.0057 |  0.0722  0.0740 | -0.9563 -0.9391 |  0.9600  0.9594
optical_flow | -0.0000  0.0000 |  0.0189  0.0192 | -0.9517 -0.9990 |  0.9371  1.0153
v_err    | -0.0117 |  0.0611 | -0.4528 |  0.0990
landing_rewards |    9.71 |    1.68 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -14.36 |    4.14 |  -33.89 |   -7.59
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5984   0.8669   3.1447 130.3035  38.5834  40.2067
Update Cnt = 3760    ET =   1098.3   Stats:  Mean, Std, Min, Max
r_f      |   -7.75    2.24    7.43 |  174.67  178.90  200.75 | -374.71 -392.76 -391.65 |  393.20  381.62  389.79
v_f      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.12   -0.09 |    0.09    0.09    0.09
r_i      |  -22.83  -17.20   22.51 |  639.70  677.09  787.29 |-1272.18-1291.44-1361.29 | 1203.50 1301.34 1343.10
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.10    0.09
norm_rf  |    0.13 |    0.06 |    0.01 |    0.37
norm_vf  |    0.07 |    0.01 |    0.04 |    0.13
gs_f     |    1.36 |    2.32 |    0.00 |   30.79
thrust   |    0.00   -0.00    0.00 |    0.65    0.64    0.65 |   -3.46   -3.39   -3.43 |    3.25    3.37    3.44
norm_thrust |    0.87 |    0.72 |    0.00 |    3.46
fuel     |    1.43 |    0.16 |    1.01 |    2.04
rewards  |   -8.28 

ADVA:  (21116,) (35421,) 0.596143530673894
ADV1:  0.0 -0.0004652931822702264 0.0060066930558051205 0.05498586092794855 -0.05588158709430274
ADVB:  (18567,) (35421,) 0.5241805708478021
ADV2:  0.03700881707010631 0.2686480032191526 0.409893457645642 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5553   0.2219   1.1104 130.3035  38.5834  40.2067
***** Episode 116868, Mean R = -8.6  Std R = 5.8  Min R = -23.5
PolicyLoss: 1.46
Policy_Entropy: 0.244
Policy_KL: 0.0059
Policy_SD: 0.525
Steps: 1.17e+04
TotalSteps: 4.36e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 3.5e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0023   9.0125   2.2441   3.0464
ADVA:  (21254,) (35456,) 0.599447202166065
ADV1:  0.00023839919539392178 -0.0002555121275930316 0.006455619059415092 0.05498586092794855 -0.13110492809428104
ADVB:  (19290,) (35456,) 0.5440546028880866
ADV2:  0.06897005086591712 0.27495217327606947 0.40228174961068736 3.0 0.0
Policy  

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0012   9.0125   2.2441   3.0464
ADVA:  (20564,) (35191,) 0.5843539541360007
ADV1:  0.0004770564076903589 -4.641583739080294e-05 0.006462298372927671 0.05328809869415685 -0.06864500142125501
ADVB:  (21250,) (35191,) 0.6038475746639765
ADV2:  0.14748916946914256 0.31925866218655435 0.4244720877578703 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5550   0.1828   0.9030 130.3035  38.5834  40.2067
***** Episode 117116, Mean R = -7.7  Std R = 3.7  Min R = -17.8
PolicyLoss: 1.5
Policy_Entropy: 0.244
Policy_KL: 0.0064
Policy_SD: 0.524
Steps: 1.17e+04
TotalSteps: 4.37e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 5.29e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0002   0.0013   9.0125   2.2441   3.0464
ADVA:  (20246,) (35148,) 0.5760213952429726
ADV1:  0.00032296342950869587 -0.00024364398431138371 0.006763952829822386 0.05328809869415685 -0.06864500142125501
AD

***** Episode 117333, Mean R = -9.4  Std R = 4.0  Min R = -20.3
PolicyLoss: 1.79
Policy_Entropy: 0.245
Policy_KL: 0.00676
Policy_SD: 0.521
Steps: 1.2e+04
TotalSteps: 4.38e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 1.6e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0022   9.0125   2.2441   3.0464
ADVA:  (22041,) (35535,) 0.6202617138032925
ADV1:  0.0002747770818315792 -0.00025907794409144334 0.006115692140242252 0.038561857520473364 -0.04657526871096507
ADVB:  (18760,) (35535,) 0.5279302096524553
ADV2:  0.042252430002908466 0.259511941841393 0.39457359277861825 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6415   0.2715   1.2324 130.3035  38.5834  40.2067
***** Episode 117364, Mean R = -9.1  Std R = 5.2  Min R = -23.2
PolicyLoss: 1.39
Policy_Entropy: 0.245
Policy_KL: 0.00753
Policy_SD: 0.523
Steps: 1.19e+04
TotalSteps: 4.38e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 7.39e-05


ValFun  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6928   0.2450   1.3132 130.3035  38.5834  40.2067
***** Episode 117581, Mean R = -8.7  Std R = 6.1  Min R = -31.5
PolicyLoss: 1.72
Policy_Entropy: 0.244
Policy_KL: 0.00951
Policy_SD: 0.521
Steps: 1.19e+04
TotalSteps: 4.39e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 3.45e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0005   0.0017   9.0125   2.2441   3.0464
ADVA:  (21768,) (35499,) 0.6132003718414604
ADV1:  0.0006343693471008317 -7.734571764084157e-05 0.006251426034064899 0.04896555567254743 -0.052529464784939535
ADVB:  (20304,) (35499,) 0.5719597735147469
ADV2:  0.11099661809827026 0.2946889877222957 0.42076931135461876 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5184   0.1832   0.9857 130.3035  38.5834  40.2067
***** Episode 117612, Mean R = -8.9  Std R = 4.4  Min R = -19.9
PolicyLoss: 1.46
Policy_Entropy: 0.245
Policy_KL: 0.00883
Policy_SD: 0.512
Steps: 1.17e+04
Tota

w        |    0.00    0.00   -0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.04    0.17 |    0.62    1.82 |   -1.43   -3.14 |    1.46    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.02    0.03    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.50
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -1.00   -1.00 |    0.99    1.00
cs_angles |  0.0016  0.0014 |  0.0723  0.0764 | -0.9991 -0.9976 |  0.9885  0.9957
optical_flow | -0.0000  0.0000 |  0.0184  0.0194 | -1.1002 -0.9889 |  1.0891  0.9765
v_err    | -0.0115 |  0.0603 | -0.4535 |  0.0984
landing_rewards |    9.68 |    1.77 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -14.41 |    4.18 |  -34.91 |   -7.72
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   7.2511   3.8252  15.0015 130.3035  38.5834  40.2067
Update Cnt = 3810    ET =   1094.7   Stats:  Mean, Std, Min, Max
r_f      |   15.49   13.33   11.48 |  184.23  176.16  204.21 | -398.20 -391.11 -392.85 |  378.19  372.64  394.26
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.08    0.10    0.10
r_i      |   31.50   30.95   11.90 |  666.01  677.58  773.14 |-1266.98-1349.33-1328.01 | 1296.54 1284.29 1342.45
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.09    0.09
norm_rf  |    0.14 |    0.05 |    0.02 |    0.30
norm_vf  |    0.07 |    0.01 |    0.04 |    0.12
gs_f     |    1.33 |    2.61 |    0.00 |   38.47
thrust   |    0.00   -0.00    0.00 |    0.67    0.67    0.66 |   -3.38   -3.33   -3.44 |    3.45    3.43    3.46
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.46 |    0.19 |    0.91 |    2.36
rewards  |   -9.64 

ADVA:  (19673,) (35011,) 0.5619091142783696
ADV1:  0.0006611010402373628 0.00017805413020174544 0.006700314555900598 0.05743999660457372 -0.08350422973244903
ADVB:  (21356,) (35011,) 0.6099797206592213
ADV2:  0.19940202176987284 0.3789122262139668 0.49263045665325284 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7971   0.2950   1.5237 130.3035  38.5834  40.2067
***** Episode 118418, Mean R = -9.2  Std R = 4.4  Min R = -19.4
PolicyLoss: 1.77
Policy_Entropy: 0.245
Policy_KL: 0.00818
Policy_SD: 0.523
Steps: 1.17e+04
TotalSteps: 4.42e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 5.35e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0005   9.0125   2.2441   3.0464
ADVA:  (18670,) (35239,) 0.5298107210760805
ADV1:  0.0007684144448994647 0.00019300054041391357 0.006887060986486538 0.05743999660457372 -0.08350422973244903
ADVB:  (22702,) (35239,) 0.6442294049206845
ADV2:  0.2617012431455268 0.42073952446832236 0.5069497970731

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0011   0.0040   9.0125   2.2441   3.0464
ADVA:  (20290,) (34862,) 0.5820090643107108
ADV1:  0.0 -0.0007451057876701707 0.006141804369305369 0.04460404998156864 -0.06115960262462583
ADVB:  (17761,) (34862,) 0.5094658940967243
ADV2:  0.013226379790440675 0.2574426654685049 0.41015347905128513 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5335   0.2067   0.9593 130.3035  38.5834  40.2067
***** Episode 118666, Mean R = -10.1  Std R = 5.5  Min R = -24.6
PolicyLoss: 1.43
Policy_Entropy: 0.245
Policy_KL: 0.0128
Policy_SD: 0.519
Steps: 1.18e+04
TotalSteps: 4.43e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.0001


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0006   0.0030   9.0125   2.2441   3.0464
ADVA:  (21924,) (35026,) 0.6259350196996517
ADV1:  0.0006037138873080036 -0.00011825720150473446 0.006691216284701842 0.038377851865394 -0.05267818918023008
ADVB:  (20522,) (35026

***** Episode 118883, Mean R = -8.3  Std R = 4.8  Min R = -19.7
PolicyLoss: 1.5
Policy_Entropy: 0.244
Policy_KL: 0.00884
Policy_SD: 0.521
Steps: 1.17e+04
TotalSteps: 4.44e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 6.34e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0002   0.0011   9.0125   2.2441   3.0464
ADVA:  (19770,) (35067,) 0.5637779108563606
ADV1:  0.0009865802234677876 0.0004350258264577624 0.006320113731062291 0.030546448749940835 -0.04517485216017543
ADVB:  (22730,) (35067,) 0.6481877548692503
ADV2:  0.25581244986544166 0.39622709024148545 0.4662668306843756 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3105   0.1213   0.6020 130.3035  38.5834  40.2067
***** Episode 118914, Mean R = -7.9  Std R = 5.3  Min R = -22.5
PolicyLoss: 1.74
Policy_Entropy: 0.245
Policy_KL: 0.00721
Policy_SD: 0.517
Steps: 1.19e+04
TotalSteps: 4.44e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 3.85e-05


ValFun  G

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9912   0.4419   1.9064 130.3035  38.5834  40.2067
***** Episode 119131, Mean R = -10.2  Std R = 6.5  Min R = -26.4
PolicyLoss: 1.79
Policy_Entropy: 0.245
Policy_KL: 0.00839
Policy_SD: 0.526
Steps: 1.18e+04
TotalSteps: 4.44e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 4.98e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0016   9.0125   2.2441   3.0464
ADVA:  (19082,) (35192,) 0.542225505796772
ADV1:  0.0002035605079807063 9.073301671376827e-05 0.005075361309049687 0.06805269685213128 -0.053628736851104364
ADVB:  (20197,) (35192,) 0.5739088429188451
ADV2:  0.13259946384353538 0.37438302860180955 0.5230357673803862 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5733   0.1983   1.0597 130.3035  38.5834  40.2067
***** Episode 119162, Mean R = -7.0  Std R = 3.1  Min R = -14.2
PolicyLoss: 1.86
Policy_Entropy: 0.245
Policy_KL: 0.00816
Policy_SD: 0.518
Steps: 1.17e+04
Total

w        |    0.00    0.00   -0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.04    0.05 |    0.66    1.84 |   -1.47   -3.14 |    1.48    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.01   -0.02 |    0.03    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.50
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.98   -0.98 |    0.98    0.97
cs_angles |  0.0025  0.0022 |  0.0734  0.0752 | -0.9757 -0.9828 |  0.9849  0.9750
optical_flow |  0.0001  0.0001 |  0.0195  0.0187 | -1.1802 -1.0935 |  1.0962  1.1149
v_err    | -0.0112 |  0.0602 | -0.4548 |  0.0964
landing_rewards |    9.84 |    1.26 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.05 |    0.01
tracking_rewards |  -14.51 |    3.96 |  -30.22 |   -7.74
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6037   0.6628   3.2849 130.3035  38.5834  40.2067
Update Cnt = 3860    ET =   1080.2   Stats:  Mean, Std, Min, Max
r_f      |  -16.75   -4.57    6.39 |  192.52  171.97  194.39 | -391.31 -373.60 -397.68 |  386.87  383.71  370.71
v_f      |    0.00   -0.00   -0.00 |    0.04    0.04    0.04 |   -0.10   -0.09   -0.10 |    0.09    0.09    0.09
r_i      |  -37.26    4.71   24.14 |  710.87  673.58  731.33 |-1290.88-1273.98-1383.35 | 1341.87 1270.03 1282.68
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.10    0.10
norm_rf  |    0.13 |    0.05 |    0.04 |    0.30
norm_vf  |    0.07 |    0.01 |    0.04 |    0.10
gs_f     |    1.12 |    1.68 |    0.00 |   16.37
thrust   |    0.00    0.00   -0.00 |    0.66    0.66    0.66 |   -3.23   -3.43   -3.45 |    3.35    3.41    3.39
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.43 |    0.16 |    0.97 |    2.03
rewards  |   -8.99 

ADVA:  (20298,) (34941,) 0.5809221258693226
ADV1:  0.0006902454902959964 0.00033656479179781565 0.006445400641028081 0.05775194385191729 -0.06451042471047025
ADVB:  (20895,) (34941,) 0.598008070747832
ADV2:  0.16653957903285396 0.3798233073827516 0.5162629447073094 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9428   0.3774   1.7291 130.3035  38.5834  40.2067
***** Episode 119968, Mean R = -9.3  Std R = 4.9  Min R = -24.5
PolicyLoss: 1.8
Policy_Entropy: 0.245
Policy_KL: 0.00771
Policy_SD: 0.517
Steps: 1.15e+04
TotalSteps: 4.48e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 1.74e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0011   0.0054   9.0125   2.2441   3.0464
ADVA:  (18897,) (35172,) 0.5372739679290345
ADV1:  0.00038924863808705284 0.0002037715217693431 0.006590199849690591 0.05775194385191729 -0.06451042471047025
ADVB:  (21782,) (35172,) 0.619299442738542
ADV2:  0.2057626822289811 0.41205884549642685 0.5433963713085892 

In [4]:
fname = "optimize_WATTVW_FOV-AR=5-AWR-RPT2"
policy.save_params(fname)
