# Test Recurrent Policy with Extreme Parameter Variation 

In [1]:
import numpy as np
import os,sys



sys.path.append('../../../RL_lib/Agents')
sys.path.append('../../../RL_lib/Policies/AWR')
sys.path.append('../../../RL_lib/Policies/Common')
sys.path.append('../../../RL_lib/Utils')
sys.path.append('../../../Env')
sys.path.append('../../../Imaging')


%load_ext autoreload
%load_ext autoreload
%autoreload 2
%matplotlib nbagg
import os
print(os.getcwd())

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/Users/briangaudet/Study/Subjects/MachineLearning/Projects/Asteroid_CPO_seeker-master/Experiments/Extended/Optimize_HF


In [2]:
%%html
<style>
.output_wrapper, .output {
    height:auto !important;
    max-height:1000px;  /* your desired max-height here */
}
.output_scroll {
    box-shadow:none !important;
    webkit-box-shadow:none !important;
}
</style>

# Optimize Policy

In [5]:
from env import Env
import env_utils as envu
from dynamics_model import Dynamics_model
from lander_model import Lander_model
from ic_gen import Landing_icgen
import rl_utils
import attitude_utils as attu
import optics_utils as optu
from arch_policy_vf_vfu import Arch

from policy_awr import Policy
from softmax_pd import Softmax_pd as PD
from value_function import Value_function

import policy_nets as policy_nets
import valfunc_nets as valfunc_nets


from agent import Agent


import torch.nn as nn

from flat_constraint import Flat_constraint
from glideslope_constraint import Glideslope_constraint
from rh_constraint import RH_constraint
from no_attitude_constraint import Attitude_constraint
from w_constraint import W_constraint
from reward_attitude import Reward
from asteroid_hfr import Asteroid

from thruster_model_cubesat import Thruster_model

from sensor import Sensor
from seeker import Seeker

landing_site_range = 0.0
landing_site = None #np.asarray([-250.,0.,0.])

asteroid_model = Asteroid(landing_site_override=landing_site, omega_range=(1e-5,5e-4))

ap = attu.Quaternion_attitude()

C_cb = optu.rotate_optical_axis(0.0, 0.0, np.pi)
r_cb = np.asarray([0,0,0])
fov=envu.deg2rad(90)
seeker = Seeker(attitude_parameterization=ap, C_cb=C_cb, r_cb=r_cb,
                radome_slope_bounds=(-0.05,0.05), range_bias=(-0.05,0.05),
                   fov=fov, debug=False)
sensor = Sensor(seeker, attitude_parameterization=ap,  use_range=True, apf_tau1=300, use_dp=False,
                      landing_site_range=landing_site_range,
                      pool_type='max', state_type=Sensor.optflow_state_range_dp1)
print(sensor.track_func)
sensor.track_func = sensor.track_func1
print(sensor.track_func)
logger = rl_utils.Logger()
dynamics_model = Dynamics_model(h=2)
thruster_model = Thruster_model(pulsed=True, scale=1.0, offset=0.4)
lander_model = Lander_model(asteroid_model, thruster_model, attitude_parameterization=ap, sensor=sensor, 
                             landing_site_range=landing_site_range, com_range=(-0.10,0.10),
                              attitude_bias=0.05, omega_bias=0.05)

lander_model.get_state_agent = lander_model.get_state_agent_sensor_att_w2

obs_dim = 13
action_dim = 12
actions_per_dim = 2
logit_dim = action_dim * actions_per_dim

recurrent_steps = 60

reward_object = Reward(landing_rlimit=2, landing_vlimit=0.1, 
                       tracking_bias=0.01, fov_coeff=-50., 
                       att_coeff=-0.20,
                       tracking_coeff=-0.5, magv_coeff=-1.0,
                       fuel_coeff=-0.10,  landing_coeff=10.0)

glideslope_constraint = Glideslope_constraint(gs_limit=-1.0)
shape_constraint = Flat_constraint()
attitude_constraint = Attitude_constraint(ap)
w_constraint = W_constraint(w_limit=(0.1,0.1,0.1), w_margin=(0.05,0.05,0.05))
rh_constraint = RH_constraint(rh_limit=150)

wi=0.05
ic_gen = Landing_icgen((800,1000), 
                           p_engine_fail=0.5,
                           engine_fail_scale=(0.5,1.0),
                           lander_wll=(-wi,-wi,-wi),
                           lander_wul=(wi,wi,wi),
                           attitude_parameterization=ap,
                           position_error=(0,np.pi/4),
                           heading_error=(0,np.pi/8),
                           attitude_error=(0,np.pi/16),
                           min_mass=450, max_mass=500,
                           mag_v=(0.05,0.1),
                           debug=False,
                           inertia_uncertainty_diag=10.0,
                           inertia_uncertainty_offdiag=1.0)

env = Env(ic_gen, lander_model, dynamics_model, logger,
          landing_site_range=landing_site_range,
          debug_done=False,
          reward_object=reward_object,
          glideslope_constraint=glideslope_constraint,
          attitude_constraint=attitude_constraint,
          w_constraint=w_constraint,
          rh_constraint=rh_constraint,
          tf_limit=5000.0,print_every=10,nav_period=6)




env.ic_gen.show()

arch = Arch()

policy = Policy(policy_nets.GRU1(obs_dim, logit_dim, recurrent_steps=recurrent_steps,output_network_scale=5), 
               PD(action_dim, actions_per_dim),
               shuffle=False,
               max_grad_norm=30,
               rollout_limit=3,
               kl_limit=0.5,
               init_func=rl_utils.xn_init)
#policy = Policy(policy_nets.GRU1(obs_dim, logit_dim, recurrent_steps=recurrent_steps), 
#                PD(action_dim, actions_per_dim),
#                shuffle=False,
#                kl_targ=0.001,epochs=20, beta=0.1, servo_kl=True, max_grad_norm=30, scale_vector_obs=True,
 #               init_func=rl_utils.xn_init)
value_function = Value_function(valfunc_nets.GRU1(obs_dim, recurrent_steps=recurrent_steps), scale_obs=True,
                                shuffle=False, batch_size=9999999, max_grad_norm=30, 
                                verbose=False)

agent = Agent(arch, policy, value_function, None, env, logger,
              policy_episodes=30, policy_steps=3000, gamma1=0.95, gamma2=0.995, 
              recurrent_steps=recurrent_steps, monitor=env.rl_stats)
agent.train(120000)

Quaternion_attitude
Euler321 Attitude
C_cb: 
[[ 1.0000000e+00  0.0000000e+00 -0.0000000e+00]
 [ 0.0000000e+00 -1.0000000e+00  1.2246468e-16]
 [ 0.0000000e+00 -1.2246468e-16 -1.0000000e+00]]
[ 0.0000000e+00 -1.2246468e-16 -1.0000000e+00]
using max  pooling
V4: Output State type:  <function Sensor.optflow_state_range_dp1 at 0x1381d4378>
<bound method Sensor.track_func1 of <sensor.Sensor object at 0x149fe5978>>
<bound method Sensor.track_func1 of <sensor.Sensor object at 0x149fe5978>>
6dof dynamics model 
thruster model: 
Inertia Tensor:  [[333.33333333   0.           0.        ]
 [  0.         333.33333333   0.        ]
 [  0.           0.         333.33333333]]
Lander Model: 
Reward_terminal equator
queue fixed
Flat Constraint
Attitude Constraint
Rotational Velocity Constraint
Position Hysterises Constraint


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

lander env RHL
Landing_icgen:
[[-1.]
 [ 1.]]
AWR Policy: 
	xn_init: layer  Linear(in_features=13, out_features=130, bias=True)
	xn_init: layer  GRUCell(130, 124)
	xn_init: layer  Linear(in_features=124, out_features=120, bias=True)
	xn_init: layer  Linear(in_features=120, out_features=24, bias=True)
Policy: recurrent steps > 1, disabling shuffle
	Test Mode:          False
	Shuffle :           False
	Shuffle by Chunks:  False
	Max Grad Norm:      30
	Recurrent Steps:    60
	Rollout Limit:      3
	Advantage Func:     <advantage_utils.Adv_relu object at 0x13d078ef0>
	Advantage Norm:     <function Adv_normalizer.apply at 0x137470378>
	PD:                 <softmax_pd.Softmax_pd object at 0x13d078b00>
Value Funtion
	xn_init: layer  Linear(in_features=13, out_features=130, bias=True)
	xn_init: layer  GRUCell(130, 25)
	xn_init: layer  Linear(in_features=25, out_features=5, bias=True)
	xn_init: layer  Linear(in_features=5, out_features=1, bias=True)
Value Function: recurrent steps > 1, disablin

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1111   0.0718   0.2175   5.5684   2.2087   2.1342
ADVA:  (6359,) (9487,) 0.6702856540529145
ADV1:  0.15606798502079755 0.08577643282881034 0.9153909454311269 1.2611057411457987 -3.3105579533484315
ADVB:  (6449,) (9487,) 0.6797723200168652
ADV2:  0.4836696678985052 0.44279487561792563 0.37827088533341696 1.1793702082475277 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0316   0.0091   0.0460   0.1243   0.0987   0.0180
***** Episode 252, Mean R = -221.7  Std R = 91.9  Min R = -502.1
PolicyLoss: 6.99
Policy_Entropy: 0.000357
Policy_KL: 0.00468
Policy_SD: 0.955
Steps: 3.35e+03
TotalSteps: 2.18e+04
VF_0_ExplainedVarNew: 0.0265
VF_0_ExplainedVarOld: 0.0249
VF_0_Loss : 0.97


*** W VIO TYPE CNT:  [ 85. 162.  54.]
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2507   0.1236   0.4431   5.5684   2.2087   2.1342
ADVA:  (6162,) (9444,) 0.6524777636594663
ADV1:  0.0 -0.019865759877806255 0.9278421498652488 1.2611057411457987 -3.42

***** Episode 473, Mean R = -393.0  Std R = 165.1  Min R = -756.1
PolicyLoss: 5.14
Policy_Entropy: 0.000421
Policy_KL: 0.00357
Policy_SD: 0.955
Steps: 8.4e+03
TotalSteps: 5.76e+04
VF_0_ExplainedVarNew: 0.104
VF_0_ExplainedVarOld: 0.0885
VF_0_Loss : 0.399


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0985   0.0518   0.1872   5.5684   2.2087   2.1342
ADVA:  (15633,) (21118,) 0.740268964864097
ADV1:  0.19827869802396714 0.031249675626480077 0.648683943677561 0.8852575846156354 -3.9286676963484277
ADVB:  (15779,) (21118,) 0.747182498342646
ADV2:  0.3595445328942105 0.3519444733391061 0.2936779667178667 1.305263222659492 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0143   0.0049   0.0231   0.1243   0.0987   0.0180
***** Episode 504, Mean R = -330.7  Std R = 141.0  Min R = -664.1
PolicyLoss: 5.02
Policy_Entropy: 0.000423
Policy_KL: 0.00209
Policy_SD: 0.955
Steps: 6.57e+03
TotalSteps: 6.42e+04
VF_0_ExplainedVarNew: 0.126
VF_0_ExplainedVarOld: 0.112
VF_0_Loss : 0.498


***

ADVA:  (19884,) (32395,) 0.6137984256829757
ADV1:  0.05196064976693149 0.009005452405660834 0.3223903732594188 1.2465702204383105 -3.930750572311836
ADVB:  (20878,) (32395,) 0.6444821731748727
ADV2:  0.21882432537510368 0.3171625522971911 0.3557902064323888 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0170   0.0063   0.0303   0.1243   0.0987   0.0180
***** Episode 721, Mean R = -471.6  Std R = 173.9  Min R = -877.6
PolicyLoss: 5.22
Policy_Entropy: 0.00046
Policy_KL: 0.00349
Policy_SD: 0.955
Steps: 1.12e+04
TotalSteps: 1.29e+05
VF_0_ExplainedVarNew: 0.356
VF_0_ExplainedVarOld: 0.345
VF_0_Loss : 0.122


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0270   0.0095   0.0456   5.5684   2.2087   2.1342
ADVA:  (20312,) (33148,) 0.61276698443345
ADV1:  0.04958943204990607 0.007628774287753819 0.33031740817480604 1.5611997017333683 -4.020778884686281
ADVB:  (21217,) (33148,) 0.6400687824303125
ADV2:  0.19858160224068228 0.30617794243014723 0.35749802458148355 3.0 0.0
Polic

attitude |    0.01    0.01   -0.00 |    1.78    0.68    1.82 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00    0.00 |    0.03    0.03    0.03 |   -0.10   -0.10   -0.10 |    0.10    0.10    0.10
a_f      |   -0.03   -0.16 |    0.70    1.90 |   -1.56   -3.14 |    1.54    3.11
w_f      |    0.00    0.00    0.00 |    0.03    0.03    0.03 |   -0.10   -0.10   -0.10 |    0.10    0.10    0.10
w_rewards |   -7.40 |    6.50 |  -42.65 |   -0.04
w_penalty |   -8.39 |   27.72 | -100.00 |    0.00
fov_penalty |  -17.90 |   23.97 |  -50.00 |    0.00
theta_cv |    1.54 |    0.69 |    0.00 |    3.13
seeker_angles |   -0.02    0.01 |    0.33    0.34 |   -1.00   -1.00 |    1.00    1.00
cs_angles | -0.0154  0.0056 |  0.3296  0.3415 | -0.9994 -0.9999 |  0.9999  0.9998
optical_flow | -0.0002 -0.0000 |  0.0020  0.0021 | -0.0316 -0.0186 |  0.0160  0.0359
v_err    | -0.4532 |  0.1691 | -1.0857 |  0.7663
landing_rewards |    0.00 |    0.00 |    0.00 |    0.00
landing_margin |  874

ADVA:  (14235,) (28897,) 0.49261168979478837
ADV1:  0.03331384444067853 0.03571893412493058 0.21028059880121078 2.026718912124634 -1.4434280713399272
ADVB:  (19155,) (28897,) 0.6628715783645361
ADV2:  0.2581486595669624 0.43859541581789296 0.5580940178127125 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0642   0.0259   0.1365   0.1365   0.0987   0.0259
Update Cnt = 40    ET =   1338.2   Stats:  Mean, Std, Min, Max
r_f      |   21.00  -10.38    6.63 |  502.35  489.35  567.67 |-1387.58-1088.21-1316.04 | 1305.59 1225.42 1364.30
v_f      |   -0.01   -0.04    0.00 |    0.40    0.40    0.40 |   -0.94   -1.11   -1.03 |    0.93    1.05    1.15
r_i      |    3.77   32.52   13.50 |  674.46  668.07  763.60 |-1258.98-1319.07-1264.35 | 1199.71 1323.28 1292.38
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |  640.68 |  282.47 |   20.90 | 1225.52
norm_vf  |    0.66 |    0.22 |    0.16 |    1.29
gs_f     |    1.0

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0572   0.0239   0.1004   5.5684   2.2087   2.1342
ADVA:  (11534,) (23671,) 0.48726289552617125
ADV1:  0.0022673073622179185 0.0044290641417395376 0.17361643851833353 2.0696213817596436 -1.0777354860305808
ADVB:  (14552,) (23671,) 0.6147606776224072
ADV2:  0.18684796128485853 0.3902465893377797 0.5519417482915577 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0722   0.0281   0.1452   0.1864   0.0987   0.0373
***** Episode 1558, Mean R = -201.3  Std R = 35.5  Min R = -331.5
PolicyLoss: 6.33
Policy_Entropy: 0.00107
Policy_KL: 0.00781
Policy_SD: 0.953
Steps: 7.78e+03
TotalSteps: 4.14e+05
VF_0_ExplainedVarNew: 0.934
VF_0_ExplainedVarOld: 0.892
VF_0_Loss : 0.0935


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0554   0.0261   0.1034   5.5684   2.2087   2.1342
ADVA:  (11434,) (23557,) 0.4853758967610477
ADV1:  0.0 0.0013476743341213225 0.16686565981841556 2.0696213817596436 -1.0156794100646453
ADVB:  (13792,) (23557,) 0

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0445   0.0169   0.0876   5.5684   2.2087   2.1342
ADVA:  (12864,) (22541,) 0.5706934031320704
ADV1:  0.0 -0.0146917270018391 0.16347723072217038 1.9744027591578055 -1.330025372941257
ADVB:  (10821,) (22541,) 0.4800585599574109
ADV2:  0.0 0.2336674461220325 0.498339088601667 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0758   0.0281   0.1453   0.1864   0.0987   0.0373
***** Episode 1806, Mean R = -169.8  Std R = 29.9  Min R = -254.1
PolicyLoss: 4.73
Policy_Entropy: 0.00145
Policy_KL: 0.0129
Policy_SD: 0.954
Steps: 7.31e+03
TotalSteps: 4.75e+05
VF_0_ExplainedVarNew: 0.917
VF_0_ExplainedVarOld: 0.889
VF_0_Loss : 0.119


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0315   0.0123   0.0587   5.5684   2.2087   2.1342
ADVA:  (13545,) (22277,) 0.6080262153790905
ADV1:  0.0 -0.0048148744835844895 0.15120074249248783 1.9633522324801478 -1.090809168815615
ADVB:  (10405,) (22277,) 0.4670736634196705
ADV2:  0.0 0.2415699729

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0211   0.0085   0.0413   5.5684   2.2087   2.1342
ADVA:  (11643,) (23348,) 0.49867226314887786
ADV1:  0.0 0.0015885188359827098 0.1495499692330295 2.0831616633278824 -1.2056449077643019
ADVB:  (12402,) (23348,) 0.5311804008908686
ADV2:  0.03626758295498661 0.2791229522771769 0.5377462695209333 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1073   0.0384   0.2209   0.2604   0.1447   0.0556
***** Episode 2054, Mean R = -161.7  Std R = 21.8  Min R = -216.9
PolicyLoss: 4.98
Policy_Entropy: 0.00201
Policy_KL: 0.0141
Policy_SD: 0.953
Steps: 7.96e+03
TotalSteps: 5.35e+05
VF_0_ExplainedVarNew: 0.957
VF_0_ExplainedVarOld: 0.943
VF_0_Loss : 0.121


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0232   0.0094   0.0516   5.5684   2.2087   2.1342
ADVA:  (12613,) (23511,) 0.5364722895665859
ADV1:  0.0 -0.0015761393596750426 0.1271396130765824 1.6836611302693685 -1.2056449077643019
ADVB:  (12527,) (23511,) 0.5328144272893539
ADV

Dynamics: Max Disturbance (m/s^2):  [0.00127453 0.00129553 0.00128748] 0.0022272011180689555
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0201   0.0064   0.0349   5.5684   2.2087   2.1342
ADVA:  (12488,) (23957,) 0.5212672705263597
ADV1:  0.0 -0.0014006063714056122 0.1362038501287322 2.2497256978352866 -1.3295268138249736
ADVB:  (13760,) (23957,) 0.5743623992987436
ADV2:  0.08860749790366597 0.2856292237206803 0.502887527779971 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1061   0.0361   0.1809   0.3220   0.1549   0.0601
***** Episode 2302, Mean R = -154.8  Std R = 26.2  Min R = -221.5
PolicyLoss: 4.61
Policy_Entropy: 0.00271
Policy_KL: 0.0132
Policy_SD: 0.951
Steps: 8.23e+03
TotalSteps: 5.99e+05
VF_0_ExplainedVarNew: 0.932
VF_0_ExplainedVarOld: 0.92
VF_0_Loss : 0.134


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0215   0.0058   0.0374   5.5684   2.2087   2.1342
ADVA:  (13059,) (24858,) 0.5253439536567704
ADV1:  0.0037189241018062424 0.00230516160086989

optical_flow |  0.0006 -0.0003 |  0.0066  0.0088 | -0.1380 -0.0968 |  0.0759  0.1729
v_err    | -0.0383 |  0.1045 | -0.4765 |  0.7850
landing_rewards |    0.00 |    0.00 |    0.00 |    0.00
landing_margin |  243.65 |  138.52 |    8.85 |  624.86
tracking_rewards |  -90.72 |   23.28 | -178.28 |  -53.66
steps    |     268 |      53 |     154 |     383
***** Episode 2550, Mean R = -149.3  Std R = 21.2  Min R = -198.2
PolicyLoss: 4.13
Policy_Entropy: 0.00347
Policy_KL: 0.012
Policy_SD: 0.948
Steps: 8.5e+03
TotalSteps: 6.66e+05
VF_0_ExplainedVarNew: 0.914
VF_0_ExplainedVarOld: 0.896
VF_0_Loss : 0.095


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0459   0.0176   0.0828   5.5684   2.2087   2.1342
ADVA:  (15252,) (25425,) 0.599882005899705
ADV1:  0.0 -0.00433106915191417 0.14606371648623112 2.3525973542531333 -1.1091500607310318
ADVB:  (10539,) (25425,) 0.4145132743362832
ADV2:  0.0 0.19936422963455214 0.5021044258141089 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0916

w        |   -0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.05   -0.06   -0.05 |    0.06    0.06    0.05
a_f      |    0.02    0.13 |    0.70    1.81 |   -1.45   -3.12 |    1.51    3.11
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.06   -0.04 |    0.03    0.04    0.04
w_rewards |   -0.00 |    0.02 |   -0.22 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |  -48.71 |    7.93 |  -50.00 |    0.00
theta_cv |    0.58 |    0.30 |    0.00 |    2.21
seeker_angles |    0.03   -0.06 |    0.32    0.32 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0254 -0.0551 |  0.3205  0.3207 | -1.0000 -1.0000 |  0.9998  0.9999
optical_flow |  0.0004  0.0001 |  0.0077  0.0097 | -0.0938 -0.2564 |  0.2284  0.2732
v_err    | -0.0266 |  0.0952 | -0.4598 |  0.6139
landing_rewards |    0.00 |    0.00 |    0.00 |    0.00
landing_margin |  199.30 |  124.24 |    8.17 |  521.48
tracking_rewards |  -83.46 |   17.08 | -141.54 |  -49.35
steps    |     280 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1119   0.0437   0.2065   0.4432   0.2259   0.1087
Update Cnt = 100    ET =   1100.8   Stats:  Mean, Std, Min, Max
r_f      |    2.58   18.83   21.31 |  254.80  246.02  298.07 | -574.00 -646.29 -699.76 |  578.59  709.17  843.26
v_f      |   -0.02    0.01   -0.00 |    0.39    0.41    0.38 |   -1.28   -1.22   -0.86 |    0.94    1.04    1.03
r_i      |   60.06   26.37   66.42 |  664.80  688.16  755.07 |-1322.19-1273.30-1355.86 | 1309.08 1318.81 1296.72
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.09    0.10    0.09
norm_rf  |  174.94 |  109.30 |   20.68 |  564.49
norm_vf  |    0.65 |    0.22 |    0.18 |    1.50
gs_f     |    0.92 |    1.07 |    0.04 |    8.22
thrust   |   -0.00   -0.00    0.00 |    0.96    0.95    0.95 |   -3.44   -3.40   -3.41 |    3.39    3.39    3.45
norm_thrust |    1.52 |    0.65 |    0.00 |    3.46
fuel     |    3.98 |    0.69 |    2.13 |    5.30
rewards  | -141.07 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1657   0.0630   0.3272   0.4748   0.2540   0.1159
***** Episode 3418, Mean R = -132.0  Std R = 21.9  Min R = -178.3
PolicyLoss: 3.45
Policy_Entropy: 0.0065
Policy_KL: 0.0119
Policy_SD: 0.943
Steps: 8.81e+03
TotalSteps: 9.15e+05
VF_0_ExplainedVarNew: 0.813
VF_0_ExplainedVarOld: 0.752
VF_0_Loss : 0.131


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0365   0.0206   0.0878   5.5684   2.2087   2.1342
ADVA:  (15886,) (26338,) 0.6031589338598223
ADV1:  0.00301564019261244 0.015614393506892414 0.1851084398071043 2.280357821662444 -1.2600205850601216
ADVB:  (11862,) (26338,) 0.4503758827549548
ADV2:  0.0 0.20583494769000318 0.5389688916281034 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2295   0.1015   0.4158   0.4748   0.2540   0.1159
***** Episode 3449, Mean R = -134.8  Std R = 21.6  Min R = -186.2
PolicyLoss: 3.73
Policy_Entropy: 0.00678
Policy_KL: 0.0109
Policy_SD: 0.944
Steps: 8.66e+03
TotalSteps: 9.23e+05
VF_0_Ex

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1818   0.0647   0.3287   0.4748   0.2540   0.1159
***** Episode 3666, Mean R = -131.0  Std R = 17.6  Min R = -176.5
PolicyLoss: 3.93
Policy_Entropy: 0.00743
Policy_KL: 0.0119
Policy_SD: 0.943
Steps: 9.06e+03
TotalSteps: 9.88e+05
VF_0_ExplainedVarNew: 0.898
VF_0_ExplainedVarOld: 0.873
VF_0_Loss : 0.128


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0275   0.0109   0.0450   5.5684   2.2087   2.1342
ADVA:  (16508,) (27812,) 0.5935567380986625
ADV1:  0.0029896096822486645 0.011859424839425377 0.16669815794679788 2.274706857736914 -1.4024616511662824
ADVB:  (14405,) (27812,) 0.5179418955846398
ADV2:  0.013564929504523926 0.24781778369691496 0.5401573005985721 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1665   0.0603   0.2938   0.4748   0.2540   0.1159
***** Episode 3697, Mean R = -136.4  Std R = 21.6  Min R = -181.7
PolicyLoss: 3.9
Policy_Entropy: 0.00755
Policy_KL: 0.012
Policy_SD: 0.944
Steps: 9.39e+03
TotalStep

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1036   0.0323   0.1909   0.6390   0.3480   0.1470
***** Episode 3914, Mean R = -132.1  Std R = 21.3  Min R = -182.4
PolicyLoss: 3.35
Policy_Entropy: 0.00818
Policy_KL: 0.0103
Policy_SD: 0.942
Steps: 9.66e+03
TotalSteps: 1.06e+06
VF_0_ExplainedVarNew: 0.851
VF_0_ExplainedVarOld: 0.822
VF_0_Loss : 0.0992


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0381   0.0185   0.0833   5.5684   2.2087   2.1342
ADVA:  (12268,) (28065,) 0.43712809549260645
ADV1:  0.0 -0.014718047416229863 0.1644499107770374 2.1497632249196372 -1.7438932085037253
ADVB:  (15472,) (28065,) 0.5512916443969357
ADV2:  0.04236586846787979 0.22446974253306728 0.4609047995238926 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1562   0.0635   0.2836   0.6390   0.3480   0.1470
***** Episode 3945, Mean R = -131.6  Std R = 20.6  Min R = -184.4
PolicyLoss: 3.29
Policy_Entropy: 0.00833
Policy_KL: 0.0121
Policy_SD: 0.942
Steps: 9.04e+03
TotalSteps: 1.07e+06
VF

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0835   0.5530   2.3241   2.3241   1.0835   0.5530
***** Episode 4162, Mean R = -134.6  Std R = 25.7  Min R = -189.9
PolicyLoss: 3.82
Policy_Entropy: 0.00947
Policy_KL: 0.012
Policy_SD: 0.938
Steps: 9.81e+03
TotalSteps: 1.14e+06
VF_0_ExplainedVarNew: 0.814
VF_0_ExplainedVarOld: 0.746
VF_0_Loss : 0.121


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0431   0.0182   0.0764   5.5684   2.2087   2.1342
ADVA:  (16768,) (28990,) 0.5784063470162125
ADV1:  0.007117620157299359 0.025027913408694855 0.18595955276545445 2.3171389317191116 -1.3303800535202046
ADVB:  (15072,) (28990,) 0.5199034149706795
ADV2:  0.014565812169177579 0.255500777751245 0.5499682984638629 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4150   0.2336   0.9650   2.3241   1.0835   0.5530
***** Episode 4193, Mean R = -134.2  Std R = 23.7  Min R = -168.2
PolicyLoss: 3.82
Policy_Entropy: 0.00969
Policy_KL: 0.0116
Policy_SD: 0.938
Steps: 9.82e+03
TotalSteps

optical_flow |  0.0001  0.0005 |  0.0126  0.0121 | -0.2919 -0.1912 |  0.5780  0.3827
v_err    | -0.0210 |  0.0824 | -0.5258 |  0.5917
landing_rewards |    0.00 |    0.00 |    0.00 |    0.00
landing_margin |  125.35 |   95.46 |    3.74 |  525.94
tracking_rewards |  -77.45 |   17.05 | -129.86 |  -44.21
steps    |     315 |      39 |     154 |     385
***** Episode 4410, Mean R = -131.7  Std R = 21.4  Min R = -173.1
PolicyLoss: 3
Policy_Entropy: 0.0105
Policy_KL: 0.00916
Policy_SD: 0.935
Steps: 1.01e+04
TotalSteps: 1.22e+06
VF_0_ExplainedVarNew: 0.875
VF_0_ExplainedVarOld: 0.842
VF_0_Loss : 0.102


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0445   0.0156   0.0739   5.5684   2.2087   2.1342
ADVA:  (15205,) (28685,) 0.530067979780373
ADV1:  0.0056635426414612805 0.008649767298374505 0.18975816608254562 2.2836374918619793 -1.57638612508774
ADVB:  (17026,) (28685,) 0.5935506362210214
ADV2:  0.06418626678649529 0.2274515589955001 0.487770097486095 3.0 0.0
Policy  Gradients: u/sd/Max/

w        |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.05   -0.05 |    0.06    0.06    0.05
a_f      |   -0.02   -0.16 |    0.65    1.87 |   -1.42   -3.13 |    1.42    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.05   -0.03 |    0.06    0.06    0.03
w_rewards |   -0.00 |    0.01 |   -0.19 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |  -45.16 |   14.78 |  -50.00 |    0.00
theta_cv |    0.63 |    0.33 |    0.00 |    2.39
seeker_angles |    0.02   -0.01 |    0.30    0.30 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0223 -0.0142 |  0.3016  0.3008 | -0.9999 -1.0000 |  0.9997  0.9995
optical_flow |  0.0004  0.0003 |  0.0131  0.0129 | -0.3396 -0.5334 |  0.2719  0.6733
v_err    | -0.0218 |  0.0816 | -0.4886 |  0.5512
landing_rewards |    0.00 |    0.00 |    0.00 |    0.00
landing_margin |  119.97 |   99.46 |    3.06 |  489.75
tracking_rewards |  -77.56 |   17.74 | -130.34 |  -41.32
steps    |     318 |  

attitude |   -0.06   -0.02   -0.02 |    1.23    0.69    1.87 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.02   -0.04 |    0.70    1.89 |   -1.45   -3.14 |    1.54    3.13
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.05   -0.03 |    0.03    0.03    0.04
w_rewards |   -0.00 |    0.00 |   -0.04 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |  -45.16 |   14.78 |  -50.00 |    0.00
theta_cv |    0.62 |    0.33 |    0.00 |    2.50
seeker_angles |    0.02   -0.01 |    0.29    0.28 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0174 -0.0072 |  0.2936  0.2834 | -0.9997 -0.9997 |  0.9997  0.9998
optical_flow |  0.0004  0.0003 |  0.0145  0.0138 | -0.7457 -0.2298 |  0.3929  0.7525
v_err    | -0.0191 |  0.0824 | -0.4842 |  0.8755
landing_rewards |    0.00 |    0.00 |    0.00 |    0.00
landing_margin |  101

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1323   0.0476   0.2402   4.3565   1.8283   1.0935
Update Cnt = 170    ET =   1534.2   Stats:  Mean, Std, Min, Max
r_f      |   34.66   15.14   13.32 |  232.01  206.70  237.48 | -658.82 -691.26 -744.96 |  700.84  609.12  669.94
v_f      |   -0.02   -0.01    0.00 |    0.36    0.38    0.36 |   -0.91   -1.27   -1.15 |    1.08    1.34    1.17
r_i      |   95.21   22.76    7.25 |  715.38  657.10  750.82 |-1358.29-1371.29-1378.54 | 1345.23 1350.70 1332.81
v_i      |   -0.01   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.10    0.09    0.10
norm_rf  |   88.78 |   91.41 |    2.92 |  514.41
norm_vf  |    0.58 |    0.27 |    0.10 |    1.49
gs_f     |    1.05 |    1.79 |    0.02 |   22.56
thrust   |   -0.01   -0.01   -0.00 |    0.97    0.97    0.97 |   -3.44   -3.31   -3.25 |    3.41    3.45    3.44
norm_thrust |    1.54 |    0.67 |    0.00 |    3.46
fuel     |    3.99 |    0.50 |    1.86 |    4.99
rewards  | -132.65 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7133   0.3263   1.7795   4.3565   1.8283   1.0935
***** Episode 5588, Mean R = -139.3  Std R = 33.7  Min R = -249.6
PolicyLoss: 4.02
Policy_Entropy: 0.017
Policy_KL: 0.0231
Policy_SD: 0.915
Steps: 1.04e+04
TotalSteps: 1.6e+06
VF_0_ExplainedVarNew: 0.854
VF_0_ExplainedVarOld: 0.802
VF_0_Loss : 0.0921


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0313   0.0126   0.0642   5.5684   2.2087   2.1342
ADVA:  (17052,) (30584,) 0.5575464295056238
ADV1:  0.0 0.011202554129931347 0.16753313746812568 2.0479579397571825 -1.680417807896934
ADVB:  (13072,) (30584,) 0.42741302641904266
ADV2:  0.0 0.23735695573493637 0.5672057004161799 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7389   0.3489   1.3676   4.3565   1.8283   1.0935
***** Episode 5619, Mean R = -135.2  Std R = 19.5  Min R = -180.4
PolicyLoss: 3.83
Policy_Entropy: 0.0174
Policy_KL: 0.0225
Policy_SD: 0.915
Steps: 1.01e+04
TotalSteps: 1.61e+06
VF_0_ExplainedVarNew: 0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3480   0.1282   0.6064   4.3565   1.8283   1.0935
***** Episode 5836, Mean R = -122.0  Std R = 26.5  Min R = -173.4
PolicyLoss: 2.78
Policy_Entropy: 0.0182
Policy_KL: 0.0116
Policy_SD: 0.915
Steps: 1.05e+04
TotalSteps: 1.68e+06
VF_0_ExplainedVarNew: 0.711
VF_0_ExplainedVarOld: 0.616
VF_0_Loss : 0.104


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0584   0.0296   0.1235   5.5684   2.2087   2.1342
ADVA:  (16254,) (30447,) 0.5338456990836535
ADV1:  0.0 -0.00545115122767402 0.210157315278185 2.359347937901815 -1.881392779350283
ADVB:  (13855,) (30447,) 0.45505304299274146
ADV2:  0.0 0.17451562406737364 0.48166770062159164 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2078   0.0771   0.4166   4.3565   1.8283   1.0935
***** Episode 5867, Mean R = -126.3  Std R = 21.1  Min R = -156.6
PolicyLoss: 2.64
Policy_Entropy: 0.0186
Policy_KL: 0.00865
Policy_SD: 0.914
Steps: 9.79e+03
TotalSteps: 1.69e+06
VF_0_ExplainedVarNew: 0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8996   0.4654   1.9659   5.3009   1.9960   1.5137
***** Episode 6084, Mean R = -126.6  Std R = 28.9  Min R = -168.4
PolicyLoss: 2.75
Policy_Entropy: 0.0208
Policy_KL: 0.0152
Policy_SD: 0.909
Steps: 1.07e+04
TotalSteps: 1.76e+06
VF_0_ExplainedVarNew: 0.834
VF_0_ExplainedVarOld: 0.801
VF_0_Loss : 0.112


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0333   0.0165   0.0821   5.5684   2.2087   2.1342
ADVA:  (23171,) (31718,) 0.730531559366921
ADV1:  0.0 -0.006959004954572275 0.1877611708755972 2.2520026842753094 -2.0525300816694916
ADVB:  (7237,) (31718,) 0.2281669714357778
ADV2:  0.0 0.129717113539071 0.4641715060291593 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.1938   2.0924   8.1404   8.1404   3.1938   2.0924
***** Episode 6115, Mean R = -122.2  Std R = 31.3  Min R = -170.1
PolicyLoss: 3.7
Policy_Entropy: 0.0207
Policy_KL: 0.0165
Policy_SD: 0.91
Steps: 1.07e+04
TotalSteps: 1.77e+06
VF_0_ExplainedVarNew: 0.771


Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3999   0.1788   0.8716   8.1404   3.1938   2.0924
***** Episode 6332, Mean R = -124.5  Std R = 26.9  Min R = -168.4
PolicyLoss: 2.59
Policy_Entropy: 0.0228
Policy_KL: 0.00732
Policy_SD: 0.911
Steps: 1.06e+04
TotalSteps: 1.85e+06
VF_0_ExplainedVarNew: 0.803
VF_0_ExplainedVarOld: 0.763
VF_0_Loss : 0.118


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0263   0.0086   0.0407   5.5684   2.2087   2.1342
ADVA:  (14807,) (31451,) 0.47079584114972495
ADV1:  0.0 0.009084720055575675 0.19626239678710805 2.3096483325958252 -1.711100222269696
ADVB:  (16922,) (31451,) 0.5380433054592859
ADV2:  0.02580493071476258 0.23113304222131675 0.5253236969460041 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4314   0.1584   0.7153   8.1404   3.1938   2.0924
***** Episode 6363, Mean R = -130.2  Std R = 27.7  Min R = -170.9
PolicyLoss: 2.85
Policy_Entropy: 0.0231
Policy_KL: 0.0085
Policy_SD: 0.912
Steps: 1.03e+04
TotalSteps: 1.86e+06
VF_0_

***** Episode 6580, Mean R = -130.2  Std R = 24.4  Min R = -167.6
PolicyLoss: 2.35
Policy_Entropy: 0.0229
Policy_KL: 0.0112
Policy_SD: 0.911
Steps: 1.06e+04
TotalSteps: 1.93e+06
VF_0_ExplainedVarNew: 0.82
VF_0_ExplainedVarOld: 0.789
VF_0_Loss : 0.124


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0190   0.0071   0.0342   5.5684   2.2087   2.1342
ADVA:  (17910,) (31982,) 0.5600025014070414
ADV1:  0.0 -0.0019694202255299688 0.18735971744387525 2.342044588724772 -1.233807974314846
ADVB:  (14348,) (31982,) 0.44862735288599837
ADV2:  0.0 0.16307044813853092 0.47840192680509824 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2262   0.0974   0.4101   8.1404   3.1938   2.0924
***** Episode 6611, Mean R = -113.9  Std R = 32.5  Min R = -190.8
PolicyLoss: 2.37
Policy_Entropy: 0.0232
Policy_KL: 0.0137
Policy_SD: 0.916
Steps: 1.06e+04
TotalSteps: 1.94e+06
VF_0_ExplainedVarNew: 0.689
VF_0_ExplainedVarOld: 0.655
VF_0_Loss : 0.133


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :

attitude |    0.02   -0.01   -0.20 |    1.20    0.62    1.89 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |   -0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.06   -0.05 |    0.05    0.06    0.06
a_f      |   -0.03   -0.16 |    0.65    1.89 |   -1.50   -3.14 |    1.46    3.13
w_f      |    0.00   -0.00    0.00 |    0.01    0.02    0.01 |   -0.05   -0.06   -0.03 |    0.04    0.06    0.03
w_rewards |   -0.00 |    0.02 |   -0.28 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |  -35.97 |   22.47 |  -50.00 |    0.00
theta_cv |    0.59 |    0.32 |    0.00 |    2.79
seeker_angles |   -0.00   -0.01 |    0.26    0.27 |   -1.00   -1.00 |    1.00    1.00
cs_angles | -0.0020 -0.0126 |  0.2613  0.2740 | -0.9996 -0.9994 |  0.9992  0.9996
optical_flow |  0.0005  0.0006 |  0.0159  0.0191 | -0.4838 -0.5962 |  0.3816  0.6667
v_err    | -0.0188 |  0.0782 | -0.5092 |  0.4914
landing_rewards |    0.03 |    0.57 |    0.00 |   10.00
landing_margin |   57

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1845   0.0690   0.3755  12.0766   4.5382   3.0443
Update Cnt = 230    ET =   1221.0   Stats:  Mean, Std, Min, Max
r_f      |   10.50  -11.07   -3.72 |  197.55  182.98  218.78 | -384.37 -462.40 -650.78 |  456.46  413.10  438.68
v_f      |    0.01    0.03    0.02 |    0.28    0.30    0.28 |   -0.93   -1.00   -0.93 |    0.91    0.87    0.93
r_i      |   10.21  -37.54  -17.81 |  669.61  642.11  788.13 |-1227.53-1204.37-1369.98 | 1249.07 1341.14 1339.06
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.08    0.10
norm_rf  |   43.70 |   55.13 |    0.42 |  378.68
norm_vf  |    0.43 |    0.24 |    0.07 |    1.21
gs_f     |    0.89 |    1.01 |    0.02 |    7.70
thrust   |    0.01    0.01   -0.00 |    0.95    0.95    0.94 |   -3.37   -3.17   -3.45 |    3.46    3.46    3.40
norm_thrust |    1.49 |    0.68 |    0.00 |    3.46
fuel     |    3.97 |    0.35 |    2.28 |    4.96
rewards  | -118.38 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3144   0.1501   0.6136  12.0766   4.5382   3.0443
***** Episode 7448, Mean R = -106.3  Std R = 29.8  Min R = -156.2
PolicyLoss: 2.3
Policy_Entropy: 0.0275
Policy_KL: 0.00725
Policy_SD: 0.902
Steps: 1.1e+04
TotalSteps: 2.23e+06
VF_0_ExplainedVarNew: 0.661
VF_0_ExplainedVarOld: 0.557
VF_0_Loss : 0.0944


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0334   0.0112   0.0542   5.5684   2.2087   2.1342
ADVA:  (18241,) (32875,) 0.5548593155893536
ADV1:  0.0 -0.007493820950713801 0.1961443374414649 2.172541718898564 -2.078772039413454
ADVB:  (14890,) (32875,) 0.4529277566539924
ADV2:  0.0 0.17647579401272587 0.48957764223386296 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5957   0.2634   1.0924  12.0766   4.5382   3.0443
***** Episode 7479, Mean R = -112.4  Std R = 35.2  Min R = -174.8
PolicyLoss: 2.45
Policy_Entropy: 0.0275
Policy_KL: 0.00961
Policy_SD: 0.903
Steps: 1.08e+04
TotalSteps: 2.24e+06
VF_0_ExplainedVarNew: 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4717   0.1884   0.8956  12.0766   4.5382   3.0443
***** Episode 7696, Mean R = -110.0  Std R = 39.0  Min R = -169.0
PolicyLoss: 2.83
Policy_Entropy: 0.0273
Policy_KL: 0.00724
Policy_SD: 0.903
Steps: 1.08e+04
TotalSteps: 2.32e+06
VF_0_ExplainedVarNew: 0.857
VF_0_ExplainedVarOld: 0.806
VF_0_Loss : 0.12


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0559   0.0236   0.1015   5.5684   2.2087   2.1342
ADVA:  (17286,) (32430,) 0.5330249768732654
ADV1:  0.00990150957631726 0.018866624592909755 0.180650828691153 2.3531567732493084 -1.8481058976479003
ADVB:  (20663,) (32430,) 0.6371569534381746
ADV2:  0.0951340825211307 0.27146350560358096 0.5349340732775347 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2255   0.0754   0.4266  12.0766   4.5382   3.0443
***** Episode 7727, Mean R = -112.6  Std R = 36.2  Min R = -170.4
PolicyLoss: 2.75
Policy_Entropy: 0.0272
Policy_KL: 0.00673
Policy_SD: 0.902
Steps: 1.11e+04
TotalSteps: 2

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3713   0.1513   0.7455  23.0599   8.5277   7.0176
***** Episode 7944, Mean R = -113.1  Std R = 35.7  Min R = -182.3
PolicyLoss: 2.28
Policy_Entropy: 0.0279
Policy_KL: 0.0095
Policy_SD: 0.907
Steps: 1.07e+04
TotalSteps: 2.41e+06
VF_0_ExplainedVarNew: 0.789
VF_0_ExplainedVarOld: 0.723
VF_0_Loss : 0.127


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0617   0.0309   0.1100   5.5684   2.2087   2.1342
ADVA:  (17821,) (32483,) 0.5486254348428409
ADV1:  0.005965175704555403 0.0032497155609738527 0.18856049179493467 2.031293751780066 -2.0413579053890087
ADVB:  (20206,) (32483,) 0.6220484561155065
ADV2:  0.07184583148756418 0.24929643082621133 0.5224062606075229 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2840   0.1184   0.5713  23.0599   8.5277   7.0176
***** Episode 7975, Mean R = -98.8  Std R = 41.0  Min R = -196.8
PolicyLoss: 2.54
Policy_Entropy: 0.0282
Policy_KL: 0.00689
Policy_SD: 0.906
Steps: 1.1e+04
TotalSteps:

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1511   0.0470   0.2712  23.0599   8.5277   7.0176
***** Episode 8192, Mean R = -100.3  Std R = 42.3  Min R = -180.1
PolicyLoss: 2.23
Policy_Entropy: 0.0282
Policy_KL: 0.0063
Policy_SD: 0.902
Steps: 1.13e+04
TotalSteps: 2.49e+06
VF_0_ExplainedVarNew: 0.835
VF_0_ExplainedVarOld: 0.805
VF_0_Loss : 0.0937


Dynamics: Max Disturbance (m/s^2):  [0.00127453 0.00129553 0.00142314] 0.0023082815838242885
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0137   0.0058   0.0255   5.5684   2.2087   2.1342
ADVA:  (21044,) (33206,) 0.6337408902005661
ADV1:  0.007349242196230461 0.008185052785396402 0.159808004601917 2.118793544769287 -2.2238529564067635
ADVB:  (16023,) (33206,) 0.482533277118593
ADV2:  0.0 0.19520826074090608 0.4755052848234312 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1464   0.0457   0.2795  23.0599   8.5277   7.0176
***** Episode 8223, Mean R = -93.9  Std R = 36.0  Min R = -163.4
PolicyLoss: 2.56
Policy_Entr

***** Episode 8440, Mean R = -97.4  Std R = 32.7  Min R = -148.1
PolicyLoss: 2.15
Policy_Entropy: 0.0287
Policy_KL: 0.00438
Policy_SD: 0.9
Steps: 1.14e+04
TotalSteps: 2.58e+06
VF_0_ExplainedVarNew: 0.785
VF_0_ExplainedVarOld: 0.734
VF_0_Loss : 0.0799


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0654   0.0236   0.1156   5.5684   2.2087   2.1342
ADVA:  (18668,) (33660,) 0.5546048722519311
ADV1:  0.004492637006043743 0.017163086243210387 0.1723667374427095 2.485069694328547 -2.372827487587931
ADVB:  (17492,) (33660,) 0.5196672608437314
ADV2:  0.010732948328561654 0.21064188275375795 0.5196739268750064 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5522   0.2100   0.9159  23.0599   8.5277   7.0176
***** Episode 8471, Mean R = -84.0  Std R = 34.8  Min R = -193.0
PolicyLoss: 2.55
Policy_Entropy: 0.0287
Policy_KL: 0.00537
Policy_SD: 0.899
Steps: 1.13e+04
TotalSteps: 2.59e+06
VF_0_ExplainedVarNew: 0.552
VF_0_ExplainedVarOld: 0.243
VF_0_Loss : 0.066


Dynamics: Max Distu

attitude |    0.06    0.02   -0.16 |    1.15    0.66    1.83 |   -3.14   -1.55   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.06   -0.07   -0.05 |    0.06    0.06    0.06
a_f      |    0.01   -0.17 |    0.66    1.82 |   -1.37   -3.14 |    1.45    3.14
w_f      |    0.00   -0.00   -0.00 |    0.02    0.03    0.01 |   -0.06   -0.07   -0.04 |    0.05    0.06    0.06
w_rewards |   -0.02 |    0.06 |   -0.48 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |  -20.81 |   24.65 |  -50.00 |    0.00
theta_cv |    0.53 |    0.30 |    0.00 |    3.07
seeker_angles |    0.00    0.00 |    0.22    0.25 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0022  0.0039 |  0.2217  0.2498 | -0.9957 -0.9994 |  0.9999  0.9996
optical_flow |  0.0011  0.0013 |  0.0193  0.0247 | -0.6557 -1.1187 |  1.2665  0.8210
v_err    | -0.0160 |  0.0760 | -0.5120 |  0.5606
landing_rewards |    0.23 |    1.49 |    0.00 |   10.00
landing_margin |   18

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2346   0.0936   0.3983  23.0599   8.5277   7.0176
Update Cnt = 290    ET =   1348.6   Stats:  Mean, Std, Min, Max
r_f      |  -12.00   19.35    4.94 |  195.38  175.78  212.74 | -478.56 -504.25 -414.72 |  380.94  384.21  639.52
v_f      |    0.01   -0.01   -0.00 |    0.20    0.22    0.20 |   -0.69   -0.79   -0.87 |    0.83    0.90    0.75
r_i      |  -12.33   57.42   20.48 |  710.46  646.31  759.47 |-1350.04-1299.09-1250.63 | 1327.07 1351.47 1340.78
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.10    0.10
norm_rf  |   18.46 |   38.78 |    0.27 |  476.21
norm_vf  |    0.31 |    0.19 |    0.04 |    0.96
gs_f     |    1.12 |    1.83 |    0.03 |   18.15
thrust   |    0.01    0.01   -0.00 |    0.91    0.91    0.90 |   -3.36   -3.38   -3.46 |    3.46    3.40    3.36
norm_thrust |    1.43 |    0.67 |    0.00 |    3.46
fuel     |    3.98 |    0.33 |    2.31 |    4.80
rewards  |  -96.10 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1876   0.0856   0.4122  23.0599   8.5277   7.0176
***** Episode 9308, Mean R = -84.0  Std R = 31.9  Min R = -162.7
PolicyLoss: 2.29
Policy_Entropy: 0.0306
Policy_KL: 0.00429
Policy_SD: 0.889
Steps: 1.13e+04
TotalSteps: 2.9e+06
VF_0_ExplainedVarNew: 0.662
VF_0_ExplainedVarOld: 0.518
VF_0_Loss : 0.0405


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0356   0.0129   0.0541   5.5684   2.2087   2.1342
ADVA:  (17051,) (33895,) 0.5030535477209028
ADV1:  0.007382777573935677 0.008978204082542249 0.17529242959139685 2.214433250427246 -2.188882770140968
ADVB:  (21201,) (33895,) 0.6254904853223189
ADV2:  0.08546787651983724 0.22741127134756 0.47608034021413775 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0914   0.0297   0.1521  23.0599   8.5277   7.0176
***** Episode 9339, Mean R = -89.9  Std R = 38.9  Min R = -161.4
PolicyLoss: 2.27
Policy_Entropy: 0.0309
Policy_KL: 0.00532
Policy_SD: 0.888
Steps: 1.11e+04
TotalSteps: 2.

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1780   0.0577   0.3356  23.0599   8.5277   7.0176
***** Episode 9556, Mean R = -93.3  Std R = 35.6  Min R = -186.3
PolicyLoss: 2.1
Policy_Entropy: 0.0316
Policy_KL: 0.00699
Policy_SD: 0.888
Steps: 1.12e+04
TotalSteps: 2.99e+06
VF_0_ExplainedVarNew: 0.626
VF_0_ExplainedVarOld: 0.47
VF_0_Loss : 0.0567


Dynamics: Max Disturbance (m/s^2):  [0.00127453 0.00129553 0.00142314] 0.0023082815838242885
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0140   0.0036   0.0210   5.5684   2.2087   2.1342
ADVA:  (18484,) (34090,) 0.5422117923144617
ADV1:  0.0 -0.0026785410242651814 0.16397932743559188 2.183916447957357 -2.3623661116759003
ADVB:  (18398,) (34090,) 0.53968905837489
ADV2:  0.0223301298637708 0.18573317380969728 0.4488883246050427 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2757   0.0983   0.5437  23.0599   8.5277   7.0176
***** Episode 9587, Mean R = -94.0  Std R = 39.7  Min R = -186.6
PolicyLoss: 2.12
Policy_Entro

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.3234   2.1353   7.9533  23.0599   8.5277   7.0176
***** Episode 9804, Mean R = -79.1  Std R = 31.8  Min R = -156.2
PolicyLoss: 2.36
Policy_Entropy: 0.0333
Policy_KL: 0.00673
Policy_SD: 0.879
Steps: 1.17e+04
TotalSteps: 3.08e+06
VF_0_ExplainedVarNew: 0.715
VF_0_ExplainedVarOld: 0.442
VF_0_Loss : 0.0236


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0170   0.0053   0.0289   5.5684   2.2087   2.1342
ADVA:  (23049,) (34157,) 0.6747957958837134
ADV1:  0.0029636224750338586 0.007177164312197044 0.13678579591361006 2.187223258969816 -2.4141663701904386
ADVB:  (15910,) (34157,) 0.4657903211640367
ADV2:  0.0 0.17697380923444805 0.4520792988140415 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3747   0.6748   2.9206  23.0599   8.5277   7.0176
***** Episode 9835, Mean R = -79.4  Std R = 28.0  Min R = -143.2
PolicyLoss: 2.28
Policy_Entropy: 0.034
Policy_KL: 0.00741
Policy_SD: 0.878
Steps: 1.11e+04
TotalSteps: 3.09e+06
VF_0_

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3171   0.1609   0.6343  23.0599   8.5277   7.0176
***** Episode 10052, Mean R = -78.0  Std R = 36.0  Min R = -153.5
PolicyLoss: 2.25
Policy_Entropy: 0.0342
Policy_KL: 0.00776
Policy_SD: 0.875
Steps: 1.14e+04
TotalSteps: 3.17e+06
VF_0_ExplainedVarNew: 0.784
VF_0_ExplainedVarOld: 0.709
VF_0_Loss : 0.0336


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0097   0.0066   0.0270   5.5684   2.2087   2.1342
ADVA:  (18501,) (34272,) 0.539828431372549
ADV1:  0.0 -0.005205353021997169 0.09513866591507332 1.324453297759432 -2.0359579411087823
ADVB:  (16512,) (34272,) 0.48179271708683474
ADV2:  0.0 0.20284605677144227 0.41213604911590196 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3247   0.1495   0.6960  23.0599   8.5277   7.0176
***** Episode 10083, Mean R = -65.9  Std R = 22.0  Min R = -153.9
PolicyLoss: 2.57
Policy_Entropy: 0.0344
Policy_KL: 0.00579
Policy_SD: 0.872
Steps: 1.15e+04
TotalSteps: 3.18e+06
VF_0_ExplainedVarN

***** Episode 10300, Mean R = -76.6  Std R = 31.7  Min R = -153.8
PolicyLoss: 2.1
Policy_Entropy: 0.0347
Policy_KL: 0.00611
Policy_SD: 0.873
Steps: 1.13e+04
TotalSteps: 3.26e+06
VF_0_ExplainedVarNew: 0.603
VF_0_ExplainedVarOld: 0.44
VF_0_Loss : 0.0266


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0157   0.0075   0.0366   5.5684   2.2087   2.1342
ADVA:  (18471,) (34216,) 0.5398351648351648
ADV1:  0.009419615720740487 0.009313546084307766 0.11513144404319779 1.7932902334403176 -2.382672148346903
ADVB:  (22758,) (34216,) 0.6651274257657236
ADV2:  0.14238799603717775 0.2650566203021084 0.447063729694479 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2593   0.1104   0.5426  23.0599   8.5277   7.0176
***** Episode 10331, Mean R = -69.4  Std R = 34.6  Min R = -184.8
PolicyLoss: 2.44
Policy_Entropy: 0.0348
Policy_KL: 0.00366
Policy_SD: 0.874
Steps: 1.14e+04
TotalSteps: 3.27e+06
VF_0_ExplainedVarNew: 0.782
VF_0_ExplainedVarOld: 0.684
VF_0_Loss : 0.0212


ValFun  Gradients

w        |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.08   -0.08   -0.10 |    0.09    0.09    0.06
a_f      |   -0.05   -0.17 |    0.68    1.81 |   -1.52   -3.13 |    1.40    3.11
w_f      |    0.01    0.00    0.00 |    0.03    0.03    0.02 |   -0.07   -0.07   -0.04 |    0.08    0.09    0.06
w_rewards |   -0.07 |    0.27 |   -3.39 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |   -8.23 |   18.54 |  -50.00 |    0.00
theta_cv |    0.50 |    0.29 |    0.00 |    3.11
seeker_angles |    0.01    0.01 |    0.19    0.19 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0101  0.0076 |  0.1924  0.1939 | -0.9996 -0.9996 |  0.9985  0.9995
optical_flow |  0.0008  0.0006 |  0.0230  0.0234 | -0.9177 -1.2275 |  0.9026  0.6947
v_err    | -0.0133 |  0.0715 | -0.5084 |  0.4689
landing_rewards |    1.10 |    3.12 |    0.00 |   10.00
landing_margin |    7.64 |   25.83 |   -0.07 |  302.54
tracking_rewards |  -56.60 |   19.76 | -144.57 |  -28.91
steps    |     369 |  

Update Cnt = 350    ET =   1317.3   Stats:  Mean, Std, Min, Max
r_f      |  -11.37   -5.33    6.86 |  194.26  170.53  203.89 | -404.70 -389.44 -421.35 |  397.54  376.41  375.72
v_f      |    0.00   -0.01   -0.00 |    0.14    0.13    0.13 |   -0.69   -0.81   -0.76 |    0.57    0.62    0.51
r_i      |  -36.65  -21.01   23.33 |  689.86  639.25  775.95 |-1266.29-1299.21-1324.50 | 1264.49 1316.60 1327.77
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    7.27 |   14.65 |    0.07 |  125.43
norm_vf  |    0.19 |    0.14 |    0.02 |    1.04
gs_f     |    1.12 |    1.51 |    0.01 |   18.79
thrust   |    0.01    0.00   -0.00 |    0.92    0.87    0.89 |   -3.39   -3.45   -3.38 |    3.45    3.40    3.39
norm_thrust |    1.39 |    0.69 |    0.00 |    3.46
fuel     |    3.62 |    0.34 |    2.68 |    4.67
rewards  |  -74.00 |   33.82 | -176.08 |  -25.65
fuel_rewards |  -10.37 |    0.96 |  -13.35 |   -7.67
glideslope_rewards | 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1573   0.0529   0.2576  23.0599   8.5277   7.0176
***** Episode 11168, Mean R = -60.7  Std R = 17.4  Min R = -120.0
PolicyLoss: 2.7
Policy_Entropy: 0.0382
Policy_KL: 0.00588
Policy_SD: 0.861
Steps: 1.15e+04
TotalSteps: 3.58e+06
VF_0_ExplainedVarNew: 0.62
VF_0_ExplainedVarOld: 0.42
VF_0_Loss : 0.0168


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0088   0.0040   0.0171   5.5684   2.2087   2.1342
ADVA:  (19512,) (34701,) 0.562289271202559
ADV1:  0.0062438980190815535 0.004965372258231871 0.0732523870127878 1.5354815180832824 -2.4134604196219813
ADVB:  (20976,) (34701,) 0.6044782571107461
ADV2:  0.12944878810079272 0.27890407290053704 0.41996458400969633 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3936   0.1782   0.7468  23.0599   8.5277   7.0176
***** Episode 11199, Mean R = -61.0  Std R = 27.6  Min R = -176.8
PolicyLoss: 2.78
Policy_Entropy: 0.0383
Policy_KL: 0.00691
Policy_SD: 0.857
Steps: 1.17e+04
TotalSteps

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3018   0.1398   0.5460  23.0599   8.5277   7.0176
***** Episode 11416, Mean R = -64.6  Std R = 23.6  Min R = -145.6
PolicyLoss: 2.26
Policy_Entropy: 0.04
Policy_KL: 0.00537
Policy_SD: 0.848
Steps: 1.17e+04
TotalSteps: 3.67e+06
VF_0_ExplainedVarNew: 0.588
VF_0_ExplainedVarOld: 0.364
VF_0_Loss : 0.023


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0044   0.0011   0.0058   5.5684   2.2087   2.1342
ADVA:  (16867,) (34411,) 0.49016302926389815
ADV1:  0.0 0.00041791137831900983 0.08450921384923867 1.3224013273873678 -2.2192296221137653
ADVB:  (19324,) (34411,) 0.5615646159658249
ADV2:  0.056356275305132375 0.24410740825321578 0.42798129817047303 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1660   0.0575   0.2850  23.0599   8.5277   7.0176
***** Episode 11447, Mean R = -56.8  Std R = 18.3  Min R = -112.1
PolicyLoss: 2.56
Policy_Entropy: 0.0406
Policy_KL: 0.00448
Policy_SD: 0.844
Steps: 1.14e+04
TotalSteps: 3.69e+06
V

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5788   0.3245   1.2978  23.0599   8.5277   7.0176
***** Episode 11664, Mean R = -65.3  Std R = 28.1  Min R = -136.2
PolicyLoss: 2.57
Policy_Entropy: 0.0432
Policy_KL: 0.00568
Policy_SD: 0.845
Steps: 1.14e+04
TotalSteps: 3.77e+06
VF_0_ExplainedVarNew: 0.759
VF_0_ExplainedVarOld: 0.707
VF_0_Loss : 0.0287


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0100   0.0031   0.0153   5.5684   2.2087   2.1342
ADVA:  (19235,) (34468,) 0.5580538470465359
ADV1:  0.0 0.0015111013302117715 0.08904355512104437 2.061532731272251 -1.6844323258450755
ADVB:  (17279,) (34468,) 0.5013055587791575
ADV2:  0.001194927542943347 0.20648304488745278 0.4241973884318892 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3771   0.8433   2.5956  23.0599   8.5277   7.0176
***** Episode 11695, Mean R = -68.0  Std R = 32.9  Min R = -153.0
PolicyLoss: 2.38
Policy_Entropy: 0.0433
Policy_KL: 0.00677
Policy_SD: 0.845
Steps: 1.15e+04
TotalSteps: 3.78e+06
VF

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0301   0.5527   2.0915  40.3910  14.8004  11.7810
***** Episode 11912, Mean R = -57.3  Std R = 24.1  Min R = -143.7
PolicyLoss: 2.7
Policy_Entropy: 0.0433
Policy_KL: 0.0127
Policy_SD: 0.839
Steps: 1.14e+04
TotalSteps: 3.86e+06
VF_0_ExplainedVarNew: 0.788
VF_0_ExplainedVarOld: 0.758
VF_0_Loss : 0.0478


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0245   0.0106   0.0411   5.5684   2.2087   2.1342
ADVA:  (17516,) (34881,) 0.5021645021645021
ADV1:  0.0021459270547279363 0.0036536715629415257 0.0667875550709077 1.49022447903951 -2.0213275490594267
ADVB:  (21108,) (34881,) 0.6051432011696912
ADV2:  0.10295726172504456 0.2772505695840198 0.454421940128749 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6696   0.3014   1.4955  40.3910  14.8004  11.7810
***** Episode 11943, Mean R = -57.8  Std R = 23.4  Min R = -156.9
PolicyLoss: 2.65
Policy_Entropy: 0.0435
Policy_KL: 0.00958
Policy_SD: 0.838
Steps: 1.18e+04
TotalSteps: 

***** Episode 12160, Mean R = -54.6  Std R = 15.6  Min R = -99.3
PolicyLoss: 2.34
Policy_Entropy: 0.0442
Policy_KL: 0.00458
Policy_SD: 0.835
Steps: 1.16e+04
TotalSteps: 3.95e+06
VF_0_ExplainedVarNew: 0.506
VF_0_ExplainedVarOld: 0.333
VF_0_Loss : 0.0318


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0070   0.0043   0.0148   5.5684   2.2087   2.1342
ADVA:  (17628,) (34473,) 0.5113567139500479
ADV1:  0.0 -0.0005690578213824008 0.06475544670236646 1.1763195054066036 -1.9339206457437133
ADVB:  (17997,) (34473,) 0.5220607431903228
ADV2:  0.03364270367781776 0.27619495908678043 0.46703566736469126 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2261   0.0715   0.3993  40.3910  14.8004  11.7810
***** Episode 12191, Mean R = -55.6  Std R = 20.6  Min R = -133.9
PolicyLoss: 3.02
Policy_Entropy: 0.0441
Policy_KL: 0.00397
Policy_SD: 0.833
Steps: 1.14e+04
TotalSteps: 3.96e+06
VF_0_ExplainedVarNew: 0.752
VF_0_ExplainedVarOld: 0.61
VF_0_Loss : 0.0452


ValFun  Gradients: u/sd/Max/

w        |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.08   -0.09   -0.09 |    0.09    0.09    0.05
a_f      |   -0.00   -0.01 |    0.70    1.77 |   -1.37   -3.13 |    1.47    3.14
w_f      |    0.01   -0.00    0.00 |    0.03    0.03    0.02 |   -0.08   -0.08   -0.05 |    0.09    0.09    0.05
w_rewards |   -0.11 |    0.33 |   -3.39 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |   -3.39 |   12.57 |  -50.00 |    0.00
theta_cv |    0.48 |    0.29 |    0.00 |    3.06
seeker_angles |    0.02    0.00 |    0.17    0.16 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0156  0.0029 |  0.1650  0.1613 | -0.9993 -0.9996 |  0.9960  0.9995
optical_flow |  0.0009  0.0003 |  0.0237  0.0243 | -0.8485 -1.1637 |  0.9385  1.2922
v_err    | -0.0134 |  0.0683 | -0.5094 |  0.3099
landing_rewards |    2.32 |    4.22 |    0.00 |   10.00
landing_margin |    2.97 |   11.22 |   -0.08 |  111.53
tracking_rewards |  -48.95 |   17.93 | -128.85 |  -23.79
steps    |     373 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1678   0.0548   0.3136  40.3910  14.8004  11.7810
Update Cnt = 410    ET =   1315.6   Stats:  Mean, Std, Min, Max
r_f      |   -1.70    8.24   13.94 |  187.85  176.47  198.16 | -392.46 -589.31 -406.06 |  398.46  387.81  411.11
v_f      |    0.01   -0.02   -0.00 |    0.11    0.11    0.09 |   -0.24   -0.48   -0.57 |    0.72    0.67    0.31
r_i      |   -8.64   77.79   81.55 |  696.93  636.71  764.87 |-1334.90-1312.48-1367.79 | 1353.56 1345.03 1331.79
v_i      |    0.00   -0.00   -0.01 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.08    0.10
norm_rf  |    5.76 |   33.11 |    0.12 |  470.05
norm_vf  |    0.15 |    0.11 |    0.02 |    0.99
gs_f     |    1.17 |    1.83 |    0.02 |   16.71
thrust   |    0.01   -0.00    0.00 |    0.90    0.87    0.86 |   -3.46   -3.42   -3.44 |    3.45    3.40    3.42
norm_thrust |    1.35 |    0.69 |    0.00 |    3.46
fuel     |    3.18 |    0.36 |    1.65 |    4.26
rewards  |  -54.38 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0990   0.0345   0.1890  40.3910  14.8004  11.7810
***** Episode 13028, Mean R = -47.2  Std R = 23.3  Min R = -159.8
PolicyLoss: 1.5
Policy_Entropy: 0.0504
Policy_KL: 0.00511
Policy_SD: 0.797
Steps: 1.15e+04
TotalSteps: 4.28e+06
VF_0_ExplainedVarNew: 0.892
VF_0_ExplainedVarOld: 0.844
VF_0_Loss : 0.0415


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0091   0.0045   0.0178   5.5684   2.2087   2.1342
ADVA:  (19088,) (34503,) 0.5532272556009622
ADV1:  0.0013210635982296715 -0.000273570070919107 0.1033052487475232 1.2165107409159341 -4.255842326490456
ADVB:  (19670,) (34503,) 0.570095354027186
ADV2:  0.051857051505443605 0.15770476683680096 0.29460914976372743 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2112   0.0765   0.4345  40.3910  14.8004  11.7810
***** Episode 13059, Mean R = -53.7  Std R = 28.3  Min R = -150.9
PolicyLoss: 1.53
Policy_Entropy: 0.0503
Policy_KL: 0.00579
Policy_SD: 0.793
Steps: 1.15e+04
TotalSt

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3649   0.1858   0.8564  40.3910  14.8004  11.7810
***** Episode 13276, Mean R = -50.3  Std R = 22.5  Min R = -146.2
PolicyLoss: 3.19
Policy_Entropy: 0.0527
Policy_KL: 0.00634
Policy_SD: 0.788
Steps: 1.15e+04
TotalSteps: 4.37e+06
VF_0_ExplainedVarNew: 0.851
VF_0_ExplainedVarOld: 0.794
VF_0_Loss : 0.0611


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0041   0.0022   0.0081   5.5684   2.2087   2.1342
ADVA:  (18613,) (35069,) 0.53075365707605
ADV1:  0.0 -0.00028000781232416803 0.04212102050823472 0.8277501046713643 -0.9927783422361416
ADVB:  (18209,) (35069,) 0.5192335110781602
ADV2:  0.02857253789600136 0.27700797167798136 0.43141550832605974 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1958   0.0721   0.3648  40.3910  14.8004  11.7810
***** Episode 13307, Mean R = -49.3  Std R = 22.7  Min R = -148.8
PolicyLoss: 2.92
Policy_Entropy: 0.0535
Policy_KL: 0.00618
Policy_SD: 0.787
Steps: 1.17e+04
TotalSteps: 4.38e+06
V

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1637   0.0573   0.2927  40.3910  14.8004  11.7810
***** Episode 13524, Mean R = -44.1  Std R = 13.7  Min R = -84.2
PolicyLoss: 2.88
Policy_Entropy: 0.0564
Policy_KL: 0.00502
Policy_SD: 0.781
Steps: 1.15e+04
TotalSteps: 4.46e+06
VF_0_ExplainedVarNew: 0.693
VF_0_ExplainedVarOld: 0.646
VF_0_Loss : 0.0491


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0021   0.0081   5.5684   2.2087   2.1342
ADVA:  (19120,) (34641,) 0.5519471146906845
ADV1:  0.0 -0.0021541941956427562 0.039376301607437963 0.8554595629260889 -0.928882856015151
ADVB:  (16186,) (34641,) 0.4672497907104298
ADV2:  0.0 0.2379521448171731 0.4186883536446636 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2675   0.1249   0.5924  40.3910  14.8004  11.7810
***** Episode 13555, Mean R = -44.7  Std R = 16.6  Min R = -84.0
PolicyLoss: 2.73
Policy_Entropy: 0.0567
Policy_KL: 0.00684
Policy_SD: 0.778
Steps: 1.16e+04
TotalSteps: 4.47e+06
VF_0_ExplainedVarNew

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1279   0.0386   0.2022  40.3910  14.8004  11.7810
***** Episode 13772, Mean R = -41.3  Std R = 14.0  Min R = -74.4
PolicyLoss: 2.51
Policy_Entropy: 0.058
Policy_KL: 0.00347
Policy_SD: 0.756
Steps: 1.16e+04
TotalSteps: 4.55e+06
VF_0_ExplainedVarNew: 0.684
VF_0_ExplainedVarOld: 0.61
VF_0_Loss : 0.0133


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0039   0.0020   0.0077   5.5684   2.2087   2.1342
ADVA:  (18541,) (34523,) 0.5370622483561683
ADV1:  8.162298375238852e-05 0.0037864512765080796 0.08669192209406062 1.491476253703349 -1.6498239064216635
ADVB:  (19145,) (34523,) 0.5545578310112099
ADV2:  0.08750445442584502 0.3153115484660681 0.48387570505868416 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1698   0.0755   0.3877  40.3910  14.8004  11.7810
***** Episode 13803, Mean R = -41.2  Std R = 14.8  Min R = -80.7
PolicyLoss: 3.05
Policy_Entropy: 0.058
Policy_KL: 0.00444
Policy_SD: 0.76
Steps: 1.16e+04
TotalSteps: 

v_err    | -0.0112 |  0.0646 | -0.5040 |  0.4458
landing_rewards |    4.55 |    4.98 |    0.00 |   10.00
landing_margin |    1.59 |   23.11 |   -0.08 |  405.46
tracking_rewards |  -38.71 |   14.88 | -151.43 |  -18.94
steps    |     373 |      23 |     225 |     439
***** Episode 14020, Mean R = -41.9  Std R = 17.9  Min R = -96.6
PolicyLoss: 2.56
Policy_Entropy: 0.061
Policy_KL: 0.00803
Policy_SD: 0.748
Steps: 1.17e+04
TotalSteps: 4.64e+06
VF_0_ExplainedVarNew: 0.445
VF_0_ExplainedVarOld: 0.426
VF_0_Loss : 0.0293


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0056   0.0031   0.0133   5.5684   2.2087   2.1342
ADVA:  (16942,) (34999,) 0.48407097345638445
ADV1:  0.0010441540048908848 0.002716922171435422 0.04024667304444757 0.530547058830271 -0.9045131682421997
ADVB:  (20566,) (34999,) 0.5876167890511157
ADV2:  0.13184517665217443 0.3451192682038507 0.4997192621242126 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2369   0.0961   0.3708  40.3910  14.8004  11.7810
****

w        |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.07   -0.08   -0.06 |    0.10    0.06    0.08
a_f      |    0.07    0.03 |    0.64    1.78 |   -1.36   -3.12 |    1.48    3.14
w_f      |    0.01   -0.01    0.00 |    0.03    0.03    0.02 |   -0.07   -0.08   -0.06 |    0.10    0.06    0.08
w_rewards |   -0.07 |    0.19 |   -1.57 |    0.00
w_penalty |   -0.32 |    5.67 | -100.00 |    0.00
fov_penalty |   -0.48 |    4.89 |  -50.00 |    0.00
theta_cv |    0.44 |    0.28 |    0.00 |    2.73
seeker_angles |   -0.00    0.00 |    0.13    0.14 |   -0.98   -1.00 |    1.00    0.99
cs_angles | -0.0013  0.0035 |  0.1308  0.1365 | -0.9839 -0.9983 |  0.9987  0.9943
optical_flow |  0.0008  0.0004 |  0.0240  0.0250 | -0.9636 -0.8180 |  1.1889  1.2351
v_err    | -0.0111 |  0.0650 | -0.5005 |  0.2052
landing_rewards |    4.29 |    4.95 |    0.00 |   10.00
landing_margin |    0.51 |    4.95 |   -0.08 |   66.73
tracking_rewards |  -39.63 |   14.27 | -117.34 |  -20.17
steps    |     373 |  

Update Cnt = 470    ET =   1233.9   Stats:  Mean, Std, Min, Max
r_f      |   -2.82   -2.93   -0.63 |  187.53  174.57  197.12 | -422.99 -375.54 -384.53 |  425.21  401.05  389.35
v_f      |   -0.01   -0.01   -0.01 |    0.07    0.08    0.08 |   -0.43   -0.35   -0.34 |    0.23    0.69    0.30
r_i      |   55.83   16.10   16.15 |  709.87  666.67  725.23 |-1307.73-1337.82-1246.34 | 1365.91 1320.93 1387.45
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.10    0.10
norm_rf  |    1.73 |    6.48 |    0.08 |   64.35
norm_vf  |    0.11 |    0.07 |    0.02 |    0.70
gs_f     |    1.13 |    1.50 |    0.01 |   13.10
thrust   |    0.00    0.00   -0.01 |    0.83    0.80    0.80 |   -3.38   -3.38   -3.34 |    3.43    3.30    3.33
norm_thrust |    1.22 |    0.69 |    0.00 |    3.46
fuel     |    2.58 |    0.37 |    1.83 |    4.22
rewards  |  -41.75 |   22.44 | -185.10 |  -15.56
fuel_rewards |   -7.39 |    1.07 |  -12.08 |   -5.25
glideslope_rewards | 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3781   0.1719   0.8233  40.3910  14.8004  11.7810
***** Episode 14888, Mean R = -40.2  Std R = 20.4  Min R = -99.9
PolicyLoss: 3.46
Policy_Entropy: 0.0743
Policy_KL: 0.00733
Policy_SD: 0.726
Steps: 1.15e+04
TotalSteps: 4.97e+06
VF_0_ExplainedVarNew: 0.91
VF_0_ExplainedVarOld: 0.609
VF_0_Loss : 0.0198


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0014   0.0051   5.5684   2.2087   2.1342
ADVA:  (19870,) (34641,) 0.5735977598799111
ADV1:  0.002013167762863699 0.0016574210808817402 0.042942381700061705 0.42336919177498994 -1.6682435917355853
ADVB:  (18308,) (34641,) 0.5285066828324817
ADV2:  0.05775619626148017 0.3506929884547011 0.530308241767812 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2168   0.0737   0.3989  40.3910  14.8004  11.7810
***** Episode 14919, Mean R = -33.6  Std R = 12.1  Min R = -66.9
PolicyLoss: 3.3
Policy_Entropy: 0.0756
Policy_KL: 0.00635
Policy_SD: 0.721
Steps: 1.16e+04
TotalSteps

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4431   0.1645   0.7933  40.3910  14.8004  11.7810
***** Episode 15136, Mean R = -40.6  Std R = 17.4  Min R = -95.8
PolicyLoss: 3.41
Policy_Entropy: 0.0776
Policy_KL: 0.00626
Policy_SD: 0.712
Steps: 1.17e+04
TotalSteps: 5.06e+06
VF_0_ExplainedVarNew: 0.81
VF_0_ExplainedVarOld: 0.798
VF_0_Loss : 0.0195


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0033   5.5684   2.2087   2.1342
ADVA:  (17085,) (34683,) 0.49260444598218145
ADV1:  0.0 -0.0006146043074317831 0.025344266870193703 0.2330191145340602 -0.4171453941459711
ADVB:  (16842,) (34683,) 0.48559813164951127
ADV2:  0.0 0.35749008112818037 0.5450879447547655 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7359   0.3960   1.6614  40.3910  14.8004  11.7810
***** Episode 15167, Mean R = -40.6  Std R = 14.5  Min R = -74.3
PolicyLoss: 3.62
Policy_Entropy: 0.078
Policy_KL: 0.00663
Policy_SD: 0.717
Steps: 1.16e+04
TotalSteps: 5.07e+06
VF_0_ExplainedVarN

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3246   0.1202   0.6288  40.3910  14.8004  11.7810
***** Episode 15384, Mean R = -38.7  Std R = 15.2  Min R = -81.1
PolicyLoss: 3.2
Policy_Entropy: 0.0818
Policy_KL: 0.0054
Policy_SD: 0.688
Steps: 1.19e+04
TotalSteps: 5.15e+06
VF_0_ExplainedVarNew: 0.712
VF_0_ExplainedVarOld: 0.695
VF_0_Loss : 0.0144


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0057   0.0030   0.0112   5.5684   2.2087   2.1342
ADVA:  (17077,) (35050,) 0.48721825962910126
ADV1:  0.0 -0.0009135437060062374 0.0322899652642697 0.5301252122353121 -0.9814518576573903
ADVB:  (18923,) (35050,) 0.539885877318117
ADV2:  0.06079168000466084 0.30404483721949743 0.42464733575599667 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2396   0.0915   0.4131  40.3910  14.8004  11.7810
***** Episode 15415, Mean R = -39.5  Std R = 29.3  Min R = -191.4
PolicyLoss: 2.74
Policy_Entropy: 0.0814
Policy_KL: 0.00492
Policy_SD: 0.687
Steps: 1.16e+04
TotalSteps: 5.17e+06
VF_0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.0607   2.4884   8.4567  40.3910  14.8004  11.7810
***** Episode 15632, Mean R = -32.9  Std R = 13.6  Min R = -69.9
PolicyLoss: 3.87
Policy_Entropy: 0.0834
Policy_KL: 0.00634
Policy_SD: 0.683
Steps: 1.16e+04
TotalSteps: 5.25e+06
VF_0_ExplainedVarNew: 0.869
VF_0_ExplainedVarOld: 0.866
VF_0_Loss : 0.017


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0017   0.0066   5.5684   2.2087   2.1342
ADVA:  (18689,) (34813,) 0.5368396863240743
ADV1:  0.0015024540723879987 0.0012191161003906787 0.0212800143698085 0.34973951122111013 -0.24911824443601027
ADVB:  (19300,) (34813,) 0.5543906012121909
ADV2:  0.13143957488124886 0.4010973766130779 0.5360637461122952 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6659   0.8046   3.7580  40.3910  14.8004  11.7810
***** Episode 15663, Mean R = -32.0  Std R = 11.6  Min R = -66.5
PolicyLoss: 3.48
Policy_Entropy: 0.0835
Policy_KL: 0.00538
Policy_SD: 0.677
Steps: 1.16e+04
TotalSte

***** Episode 15880, Mean R = -30.3  Std R = 13.9  Min R = -66.2
PolicyLoss: 2.95
Policy_Entropy: 0.0891
Policy_KL: 0.00421
Policy_SD: 0.657
Steps: 1.17e+04
TotalSteps: 5.34e+06
VF_0_ExplainedVarNew: 0.887
VF_0_ExplainedVarOld: 0.881
VF_0_Loss : 0.0196


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0033   5.5684   2.2087   2.1342
ADVA:  (18889,) (34621,) 0.5455937147973773
ADV1:  0.0017380332144960736 0.002433753505267196 0.01918513739708922 0.19095653499019719 -0.1933047430674772
ADVB:  (19126,) (34621,) 0.5524392709627105
ADV2:  0.13477189476253218 0.4179766159838093 0.5724818364268573 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5319   0.2244   1.0636  40.3910  14.8004  11.7810
***** Episode 15911, Mean R = -31.7  Std R = 11.8  Min R = -59.4
PolicyLoss: 3.56
Policy_Entropy: 0.0897
Policy_KL: 0.00481
Policy_SD: 0.66
Steps: 1.15e+04
TotalSteps: 5.35e+06
VF_0_ExplainedVarNew: 0.891
VF_0_ExplainedVarOld: 0.884
VF_0_Loss : 0.0181


ValFun  Gradie

attitude |   -0.10    0.04   -0.03 |    1.22    0.70    1.85 |   -3.14   -1.55   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.07   -0.05 |    0.08    0.06    0.07
a_f      |    0.04   -0.07 |    0.67    1.82 |   -1.50   -3.14 |    1.43    3.14
w_f      |    0.01   -0.00    0.00 |    0.03    0.03    0.02 |   -0.06   -0.07   -0.05 |    0.08    0.06    0.07
w_rewards |   -0.03 |    0.09 |   -0.66 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |   -0.16 |    2.84 |  -50.00 |    0.00
theta_cv |    0.38 |    0.27 |    0.00 |    1.88
seeker_angles |    0.00    0.00 |    0.11    0.11 |   -0.99   -0.99 |    0.99    1.00
cs_angles |  0.0038  0.0026 |  0.1077  0.1128 | -0.9915 -0.9947 |  0.9888  0.9978
optical_flow |  0.0007 -0.0001 |  0.0234  0.0263 | -0.9879 -1.2212 |  0.9835  1.2182
v_err    | -0.0089 |  0.0624 | -0.5000 |  0.2283
landing_rewards |    6.58 |    4.74 |    0.00 |   10.00
landing_margin |    0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0257   0.5751   2.1057  40.3910  14.8004  11.7810
Update Cnt = 530    ET =   1185.6   Stats:  Mean, Std, Min, Max
r_f      |   -0.45   11.64   -0.74 |  187.52  162.82  205.34 | -392.85 -385.17 -391.80 |  395.56  382.08  413.04
v_f      |    0.00   -0.01   -0.00 |    0.06    0.05    0.06 |   -0.15   -0.12   -0.20 |    0.16    0.16    0.17
r_i      |    4.42   70.94    1.02 |  701.27  630.87  771.26 |-1346.63-1332.72-1265.12 | 1342.34 1274.46 1337.29
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.09    0.10
norm_rf  |    0.67 |    2.15 |    0.06 |   37.97
norm_vf  |    0.09 |    0.03 |    0.02 |    0.23
gs_f     |    1.29 |    2.18 |    0.01 |   30.45
thrust   |    0.01    0.00    0.00 |    0.76    0.75    0.74 |   -3.42   -3.41   -3.43 |    3.29    3.43    3.46
norm_thrust |    1.08 |    0.72 |    0.00 |    3.46
fuel     |    2.05 |    0.32 |    1.49 |    3.77
rewards  |  -29.22 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4892   0.1928   0.8883  40.3910  14.8004  11.7810
***** Episode 16748, Mean R = -27.2  Std R = 10.3  Min R = -54.0
PolicyLoss: 3.15
Policy_Entropy: 0.103
Policy_KL: 0.00441
Policy_SD: 0.636
Steps: 1.17e+04
TotalSteps: 5.67e+06
VF_0_ExplainedVarNew: 0.884
VF_0_ExplainedVarOld: 0.874
VF_0_Loss : 0.0274


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0043   5.5684   2.2087   2.1342
ADVA:  (17486,) (35036,) 0.4990866537275945
ADV1:  0.000597394176789192 0.000926707599617607 0.017064972907680845 0.12586731736427692 -0.17408968074049228
ADVB:  (20328,) (35036,) 0.5802032195456103
ADV2:  0.2088441736991103 0.45758330285266546 0.5716322904973093 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3464   0.1247   0.6017  40.3910  14.8004  11.7810
***** Episode 16779, Mean R = -27.1  Std R = 9.3  Min R = -60.8
PolicyLoss: 3.51
Policy_Entropy: 0.105
Policy_KL: 0.00393
Policy_SD: 0.629
Steps: 1.19e+04
TotalSteps

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7355   0.3227   1.3719  40.3910  14.8004  11.7810
***** Episode 16996, Mean R = -25.8  Std R = 9.6  Min R = -52.7
PolicyLoss: 2.91
Policy_Entropy: 0.107
Policy_KL: 0.00605
Policy_SD: 0.62
Steps: 1.15e+04
TotalSteps: 5.76e+06
VF_0_ExplainedVarNew: 0.885
VF_0_ExplainedVarOld: 0.882
VF_0_Loss : 0.021


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0017   5.5684   2.2087   2.1342
ADVA:  (18955,) (34936,) 0.542563544767575
ADV1:  0.0 -0.00030619336232689767 0.016448956482720427 0.14706326335668568 -0.1462587459663846
ADVB:  (18147,) (34936,) 0.5194355392718113
ADV2:  0.04356940199601664 0.3549700820513782 0.5056225467128211 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.6335   1.5375   5.3337  40.3910  14.8004  11.7810
***** Episode 17027, Mean R = -26.2  Std R = 6.6  Min R = -41.1
PolicyLoss: 2.99
Policy_Entropy: 0.108
Policy_KL: 0.00576
Policy_SD: 0.618
Steps: 1.18e+04
TotalSteps: 5.77e+06
VF_0_Exp

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4260   0.1762   0.7908  40.3910  14.8004  11.7810
***** Episode 17244, Mean R = -25.9  Std R = 8.6  Min R = -51.3
PolicyLoss: 3.16
Policy_Entropy: 0.111
Policy_KL: 0.00417
Policy_SD: 0.604
Steps: 1.17e+04
TotalSteps: 5.85e+06
VF_0_ExplainedVarNew: 0.889
VF_0_ExplainedVarOld: 0.886
VF_0_Loss : 0.0266


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0014   5.5684   2.2087   2.1342
ADVA:  (19141,) (34825,) 0.5496338837042355
ADV1:  0.0002561163528857005 0.00026895507061295605 0.017616182866246347 0.13385453919569656 -0.29483966166238457
ADVB:  (18343,) (34825,) 0.5267193108399139
ADV2:  0.07425451911041545 0.35321057954321605 0.4954561749599034 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5411   0.2837   1.2167  40.3910  14.8004  11.7810
***** Episode 17275, Mean R = -23.7  Std R = 6.3  Min R = -36.2
PolicyLoss: 2.92
Policy_Entropy: 0.111
Policy_KL: 0.00493
Policy_SD: 0.606
Steps: 1.16e+04
TotalSt

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2866   0.0977   0.5376  40.3910  14.8004  11.7810
***** Episode 17492, Mean R = -25.0  Std R = 7.5  Min R = -42.1
PolicyLoss: 3.23
Policy_Entropy: 0.114
Policy_KL: 0.0051
Policy_SD: 0.622
Steps: 1.15e+04
TotalSteps: 5.95e+06
VF_0_ExplainedVarNew: 0.925
VF_0_ExplainedVarOld: 0.923
VF_0_Loss : 0.023


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0031   5.5684   2.2087   2.1342
ADVA:  (19155,) (34548,) 0.5544459881903439
ADV1:  0.0018653172226022127 0.0009453849839769527 0.015932496532363886 0.11798396100600564 -0.12114149223177495
ADVB:  (21173,) (34548,) 0.6128574736598356
ADV2:  0.23621645786676193 0.44068383552230334 0.5526307762994564 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2672   0.0971   0.4897  40.3910  14.8004  11.7810
***** Episode 17523, Mean R = -22.0  Std R = 9.3  Min R = -45.0
PolicyLoss: 3.08
Policy_Entropy: 0.114
Policy_KL: 0.0046
Policy_SD: 0.62
Steps: 1.14e+04
TotalSteps: 

tracking_rewards |  -27.20 |    7.96 |  -79.49 |  -14.73
steps    |     375 |      20 |     333 |     416
***** Episode 17740, Mean R = -24.9  Std R = 9.5  Min R = -42.3
PolicyLoss: 3.03
Policy_Entropy: 0.117
Policy_KL: 0.00409
Policy_SD: 0.613
Steps: 1.17e+04
TotalSteps: 6.04e+06
VF_0_ExplainedVarNew: 0.9
VF_0_ExplainedVarOld: 0.896
VF_0_Loss : 0.021


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0010   0.0038   5.5684   2.2087   2.1342
ADVA:  (19018,) (34843,) 0.54581982033694
ADV1:  0.0016851272510371267 -0.00020077594596950993 0.017347375495225664 0.09665868642369993 -0.15852914172618046
ADVB:  (21099,) (34843,) 0.605544872714749
ADV2:  0.27445110382595345 0.44569253019784816 0.5258117734680432 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4253   0.1600   0.7849  40.3910  14.8004  11.7810
***** Episode 17771, Mean R = -22.5  Std R = 10.3  Min R = -46.2
PolicyLoss: 3.1
Policy_Entropy: 0.118
Policy_KL: 0.00426
Policy_SD: 0.601
Steps: 1.16e+04
TotalStep

attitude |   -0.18   -0.03    0.07 |    1.19    0.65    1.84 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.06   -0.05 |    0.07    0.06    0.06
a_f      |   -0.02   -0.04 |    0.67    1.81 |   -1.46   -3.13 |    1.54    3.13
w_f      |    0.01    0.00    0.01 |    0.03    0.02    0.02 |   -0.05   -0.06   -0.04 |    0.07    0.06    0.06
w_rewards |   -0.01 |    0.04 |   -0.40 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.33 |    0.26 |    0.00 |    1.44
seeker_angles |    0.00    0.00 |    0.09    0.10 |   -0.99   -0.99 |    1.00    0.98
cs_angles |  0.0001  0.0037 |  0.0910  0.1006 | -0.9907 -0.9912 |  0.9996  0.9836
optical_flow |  0.0004 -0.0001 |  0.0250  0.0246 | -0.8683 -1.1183 |  1.2030  1.2841
v_err    | -0.0095 |  0.0601 | -0.4999 |  0.1580
landing_rewards |    7.13 |    4.52 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4106   0.1636   0.7597  40.3910  14.8004  11.7810
Update Cnt = 590    ET =   1158.6   Stats:  Mean, Std, Min, Max
r_f      |   -0.11  -15.09   -7.49 |  187.14  165.68  209.88 | -391.54 -390.22 -740.57 |  374.31  324.13  386.45
v_f      |    0.00   -0.00    0.00 |    0.05    0.06    0.06 |   -0.29   -0.71   -0.14 |    0.17    0.14    0.52
r_i      |   -3.09  -62.64  -16.51 |  712.69  647.46  751.84 |-1368.93-1253.50-1260.75 | 1314.95 1214.01 1359.90
v_i      |    0.00    0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.09    0.09    0.09
norm_rf  |    1.67 |   22.49 |    0.02 |  397.03
norm_vf  |    0.09 |    0.05 |    0.03 |    0.93
gs_f     |    1.11 |    1.33 |    0.01 |    9.53
thrust   |   -0.00   -0.00   -0.00 |    0.70    0.70    0.69 |   -3.26   -3.33   -3.42 |    3.39    3.33    3.36
norm_thrust |    0.97 |    0.72 |    0.00 |    3.46
fuel     |    1.77 |    0.26 |    1.24 |    2.85
rewards  |  -22.93 |

ADVA:  (19904,) (35018,) 0.5683933976811926
ADV1:  0.0005975447282551474 -0.0005039893188611594 0.015326789454669669 0.09583148866891861 -0.11970310615273838
ADVB:  (19475,) (35018,) 0.5561425552572963
ADV2:  0.13208944171418768 0.3879993954283595 0.5234094667944146 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5415   0.2000   0.9285  40.3910  14.8004  11.7810
***** Episode 18608, Mean R = -22.7  Std R = 7.9  Min R = -42.3
PolicyLoss: 2.87
Policy_Entropy: 0.127
Policy_KL: 0.00368
Policy_SD: 0.581
Steps: 1.18e+04
TotalSteps: 6.36e+06
VF_0_ExplainedVarNew: 0.916
VF_0_ExplainedVarOld: 0.911
VF_0_Loss : 0.0289


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0034   0.0022   0.0087   5.5684   2.2087   2.1342
ADVA:  (18438,) (35189,) 0.5239705589814999
ADV1:  0.0030505485916927193 0.0017739764712313207 0.014410577998103698 0.09583148866891861 -0.11970310615273838
ADVB:  (23026,) (35189,) 0.6543522123390833
ADV2:  0.3671027026441752 0.5226936749197851 0.5861175136214913 3

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0018   5.5684   2.2087   2.1342
ADVA:  (21069,) (34724,) 0.6067561340859348
ADV1:  0.00249889702254269 0.0003755708677174967 0.014365820839175725 0.08970513603903996 -0.0901663910491075
ADVB:  (20892,) (34724,) 0.6016587950696924
ADV2:  0.2245236231784906 0.3926574412705788 0.4869610238790101 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3998   0.2037   0.8785  40.3910  14.8004  11.7810
***** Episode 18856, Mean R = -17.9  Std R = 6.6  Min R = -34.8
PolicyLoss: 2.66
Policy_Entropy: 0.129
Policy_KL: 0.00724
Policy_SD: 0.579
Steps: 1.15e+04
TotalSteps: 6.46e+06
VF_0_ExplainedVarNew: 0.892
VF_0_ExplainedVarOld: 0.886
VF_0_Loss : 0.0258


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0021   5.5684   2.2087   2.1342
ADVA:  (21719,) (34636,) 0.6270643261346576
ADV1:  0.002279769695977877 0.00031953294424420575 0.013903277667815336 0.08970513603903996 -0.08268823528213251
ADVB:  (203

***** Episode 19073, Mean R = -21.5  Std R = 6.6  Min R = -35.2
PolicyLoss: 2.14
Policy_Entropy: 0.132
Policy_KL: 0.00503
Policy_SD: 0.57
Steps: 1.17e+04
TotalSteps: 6.54e+06
VF_0_ExplainedVarNew: 0.901
VF_0_ExplainedVarOld: 0.897
VF_0_Loss : 0.036


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0067   0.0039   0.0150   5.5684   2.2087   2.1342
ADVA:  (19685,) (34906,) 0.5639431616341031
ADV1:  0.0020748731598526204 0.000694446446165913 0.013827327957526649 0.07502403220333836 -0.09194253145289787
ADVB:  (21164,) (34906,) 0.6063141007276686
ADV2:  0.22683058002568796 0.43258588510779794 0.5316136879233389 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.1083   0.9534   3.5255  40.3910  14.8004  11.7810
***** Episode 19104, Mean R = -20.4  Std R = 7.2  Min R = -36.2
PolicyLoss: 2.88
Policy_Entropy: 0.132
Policy_KL: 0.00478
Policy_SD: 0.569
Steps: 1.16e+04
TotalSteps: 6.55e+06
VF_0_ExplainedVarNew: 0.947
VF_0_ExplainedVarOld: 0.942
VF_0_Loss : 0.0344


ValFun  Gradient

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3215   0.1173   0.5573  40.3910  14.8004  11.7810
***** Episode 19321, Mean R = -20.2  Std R = 7.0  Min R = -37.8
PolicyLoss: 2.4
Policy_Entropy: 0.134
Policy_KL: 0.00431
Policy_SD: 0.569
Steps: 1.18e+04
TotalSteps: 6.63e+06
VF_0_ExplainedVarNew: 0.931
VF_0_ExplainedVarOld: 0.929
VF_0_Loss : 0.0322


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0034   0.0019   0.0073   5.5684   2.2087   2.1342
ADVA:  (20881,) (35013,) 0.5963784879901751
ADV1:  0.002681732121051831 0.0008918136877600991 0.01285154707991367 0.08384854183588786 -0.08866195568844837
ADVB:  (22596,) (35013,) 0.6453602947476651
ADV2:  0.30017195495121285 0.43298763505023496 0.5050911562403774 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4693   0.1923   0.8599  40.3910  14.8004  11.7810
***** Episode 19352, Mean R = -17.5  Std R = 5.6  Min R = -32.1
PolicyLoss: 2.69
Policy_Entropy: 0.135
Policy_KL: 0.00494
Policy_SD: 0.568
Steps: 1.15e+04
TotalSteps:

cs_angles |  0.0008  0.0046 |  0.0853  0.0877 | -0.9909 -0.9997 |  0.9862  0.9937
optical_flow |  0.0001 -0.0002 |  0.0250  0.0249 | -1.1228 -0.9653 |  0.9103  1.1801
v_err    | -0.0094 |  0.0583 | -0.4522 |  0.1441
landing_rewards |    8.19 |    3.85 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.08 |    0.06
tracking_rewards |  -22.29 |    5.02 |  -38.49 |  -11.41
steps    |     375 |      21 |     333 |     417
***** Episode 19600, Mean R = -18.5  Std R = 6.5  Min R = -36.5
PolicyLoss: 2.15
Policy_Entropy: 0.137
Policy_KL: 0.00417
Policy_SD: 0.564
Steps: 1.15e+04
TotalSteps: 6.74e+06
VF_0_ExplainedVarNew: 0.913
VF_0_ExplainedVarOld: 0.91
VF_0_Loss : 0.0347


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0013   0.0047   5.5684   2.2087   2.1342
ADVA:  (22078,) (34833,) 0.633824247121982
ADV1:  0.0015067073961065013 -0.00020341940304282422 0.012609750972620754 0.07793452229337872 -0.13501791860370474
ADVB:  (20388,) (34833,) 0.5853070364309706
ADV2:  0.1

w        |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.06   -0.05 |    0.06    0.05    0.05
a_f      |    0.02   -0.21 |    0.70    1.84 |   -1.55   -3.12 |    1.52    3.08
w_f      |    0.00    0.00    0.00 |    0.02    0.02    0.01 |   -0.05   -0.06   -0.04 |    0.06    0.05    0.04
w_rewards |   -0.00 |    0.01 |   -0.10 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.28 |    0.25 |    0.00 |    1.80
seeker_angles |    0.00    0.00 |    0.08    0.09 |   -0.99   -0.99 |    1.00    0.99
cs_angles |  0.0004  0.0012 |  0.0836  0.0862 | -0.9947 -0.9919 |  0.9955  0.9884
optical_flow |  0.0001 -0.0001 |  0.0238  0.0262 | -0.9294 -1.1822 |  1.2663  1.2336
v_err    | -0.0095 |  0.0586 | -0.4531 |  0.1375
landing_rewards |    8.45 |    3.62 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.06 |    0.06
tracking_rewards |  -21.87 |    4.86 |  -38.82 |  -12.75
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6643   0.3360   1.4571  40.3910  14.8004  11.7810
Update Cnt = 650    ET =   1443.5   Stats:  Mean, Std, Min, Max
r_f      |   -0.58   -9.01   12.29 |  188.27  166.86  202.50 | -394.92 -385.10 -387.78 |  398.74  398.98  384.36
v_f      |   -0.00    0.00   -0.00 |    0.04    0.05    0.05 |   -0.09   -0.13   -0.12 |    0.11    0.13    0.15
r_i      |   -8.36  -83.63   22.80 |  674.88  658.63  766.70 |-1307.94-1345.00-1311.90 | 1317.84 1280.56 1292.60
v_i      |    0.00    0.01   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.33 |    0.15 |    0.04 |    0.75
norm_vf  |    0.08 |    0.02 |    0.02 |    0.15
gs_f     |    1.38 |    2.16 |    0.01 |   21.99
thrust   |   -0.00    0.00   -0.00 |    0.68    0.68    0.67 |   -3.41   -3.38   -3.40 |    3.45    3.39    3.45
norm_thrust |    0.91 |    0.73 |    0.00 |    3.46
fuel     |    1.61 |    0.21 |    1.08 |    2.39
rewards  |  -18.24 |

ADVA:  (21596,) (35107,) 0.6151479761870852
ADV1:  0.0005869248325772649 -0.0005428120151290169 0.011716824120866852 0.07763631470664539 -0.10380771770594843
ADVB:  (19242,) (35107,) 0.5480958213461703
ADV2:  0.08210378066782029 0.308899435768655 0.45399862669956614 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3417   0.1500   0.6696  40.3910  14.8004  11.7810
***** Episode 20468, Mean R = -17.0  Std R = 6.4  Min R = -38.9
PolicyLoss: 2.16
Policy_Entropy: 0.146
Policy_KL: 0.00599
Policy_SD: 0.562
Steps: 1.17e+04
TotalSteps: 7.06e+06
VF_0_ExplainedVarNew: 0.937
VF_0_ExplainedVarOld: 0.935
VF_0_Loss : 0.0311


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0048   5.5684   2.2087   2.1342
ADVA:  (20958,) (34988,) 0.5990053732708357
ADV1:  0.00039867781533996227 -0.0007495195695535791 0.011304174098535345 0.0933855872046419 -0.10380771770594843
ADVB:  (19589,) (34988,) 0.5598776723448039
ADV2:  0.10295604740963175 0.31716162086423294 0.448216884961590

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0046   0.0027   0.0099   5.5684   2.2087   2.1342
ADVA:  (19781,) (34886,) 0.5670182881385083
ADV1:  0.0 -0.00166793037398725 0.011749951579289116 0.10496475412793776 -0.09811371788767204
ADVB:  (18658,) (34886,) 0.5348277245886602
ADV2:  0.0579286718362481 0.28076153208484633 0.4223817055025942 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8215   0.3546   1.3556  40.3910  14.8004  11.7810
***** Episode 20716, Mean R = -20.4  Std R = 7.9  Min R = -46.8
PolicyLoss: 1.99
Policy_Entropy: 0.149
Policy_KL: 0.00395
Policy_SD: 0.55
Steps: 1.16e+04
TotalSteps: 7.16e+06
VF_0_ExplainedVarNew: 0.939
VF_0_ExplainedVarOld: 0.932
VF_0_Loss : 0.0312


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0063   5.5684   2.2087   2.1342
ADVA:  (20838,) (35067,) 0.5942338951150654
ADV1:  0.00031041923013695044 -0.001152793392330008 0.012709107560146135 0.10496475412793776 -0.09727326941961259
ADVB:  (19491,) (35067,) 0

***** Episode 20933, Mean R = -17.5  Std R = 6.3  Min R = -39.5
PolicyLoss: 2.17
Policy_Entropy: 0.149
Policy_KL: 0.00617
Policy_SD: 0.56
Steps: 1.16e+04
TotalSteps: 7.24e+06
VF_0_ExplainedVarNew: 0.94
VF_0_ExplainedVarOld: 0.937
VF_0_Loss : 0.0332


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0041   0.0022   0.0090   5.5684   2.2087   2.1342
ADVA:  (20126,) (34993,) 0.5751436001486011
ADV1:  0.0015394716979781307 0.0005028083359279698 0.010690752296486913 0.06352748898557498 -0.07959180268748484
ADVB:  (22331,) (34993,) 0.6381562026691052
ADV2:  0.2470512677113827 0.39991846508485956 0.48704519671935903 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7191   0.2333   1.1905  40.3910  14.8004  11.7810
***** Episode 20964, Mean R = -15.0  Std R = 5.0  Min R = -27.4
PolicyLoss: 2.38
Policy_Entropy: 0.149
Policy_KL: 0.00845
Policy_SD: 0.559
Steps: 1.17e+04
TotalSteps: 7.25e+06
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.961
VF_0_Loss : 0.0393


ValFun  Gradien

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2484   0.5104   1.9237  40.3910  14.8004  11.7810
***** Episode 21181, Mean R = -17.1  Std R = 6.0  Min R = -37.9
PolicyLoss: 2.31
Policy_Entropy: 0.151
Policy_KL: 0.00487
Policy_SD: 0.561
Steps: 1.16e+04
TotalSteps: 7.33e+06
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.0326


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0006   0.0025   5.5684   2.2087   2.1342
ADVA:  (20356,) (34988,) 0.5817994741054076
ADV1:  0.0005515552746464536 -0.0003055004695234915 0.010647923627022824 0.07259924455023714 -0.09539206695635662
ADVB:  (19888,) (34988,) 0.5684234594718189
ADV2:  0.11897409641473283 0.33539011178501194 0.47694379133979015 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6746   0.3369   1.3911  40.3910  14.8004  11.7810
***** Episode 21212, Mean R = -15.8  Std R = 6.0  Min R = -31.4
PolicyLoss: 2.22
Policy_Entropy: 0.151
Policy_KL: 0.00444
Policy_SD: 0.555
Steps: 1.16e+04
TotalSt

attitude |   -0.03    0.01   -0.10 |    1.22    0.66    1.93 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.06   -0.05 |    0.07    0.06    0.05
a_f      |    0.01   -0.16 |    0.66    1.92 |   -1.43   -3.11 |    1.37    3.14
w_f      |    0.00    0.00    0.00 |    0.02    0.02    0.01 |   -0.06   -0.06   -0.04 |    0.07    0.05    0.04
w_rewards |   -0.00 |    0.02 |   -0.34 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.26 |    0.25 |    0.00 |    1.53
seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -1.00   -0.99 |    1.00    0.99
cs_angles |  0.0009 -0.0001 |  0.0828  0.0848 | -0.9997 -0.9940 |  0.9976  0.9879
optical_flow |  0.0000 -0.0001 |  0.0234  0.0266 | -0.9808 -1.1256 |  1.0432  1.3271
v_err    | -0.0103 |  0.0590 | -0.4533 |  0.1225
landing_rewards |    9.16 |    2.77 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3120   0.1434   0.6161  40.3910  14.8004  11.7810
Update Cnt = 700    ET =   1374.2   Stats:  Mean, Std, Min, Max
r_f      |   -0.82    9.60    8.23 |  173.56  163.98  206.81 | -393.62 -393.48 -394.72 |  389.11  372.38  387.86
v_f      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.11   -0.11 |    0.11    0.10    0.11
r_i      |   11.31   20.23   14.52 |  649.67  635.84  796.50 |-1227.85-1317.40-1304.67 | 1317.58 1232.59 1332.19
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.30 |    0.15 |    0.02 |    1.01
norm_vf  |    0.07 |    0.02 |    0.02 |    0.13
gs_f     |    1.30 |    1.39 |    0.01 |    9.62
thrust   |    0.00    0.00    0.00 |    0.66    0.66    0.66 |   -3.46   -3.32   -3.46 |    3.40    3.42    3.25
norm_thrust |    0.88 |    0.73 |    0.00 |    3.46
fuel     |    1.58 |    0.19 |    1.14 |    2.23
rewards  |  -15.08 |

ADVA:  (19174,) (35211,) 0.544545738547613
ADV1:  0.0004977527808537226 -0.0003724415635806778 0.010299164240179213 0.0846607226279909 -0.1156261194759739
ADVB:  (21224,) (35211,) 0.6027661810229757
ADV2:  0.18533896823982052 0.3780733453029915 0.49258689786399373 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3059   0.1034   0.5408  40.3910  14.8004  11.7810
***** Episode 22018, Mean R = -15.2  Std R = 6.5  Min R = -33.5
PolicyLoss: 2.34
Policy_Entropy: 0.155
Policy_KL: 0.00439
Policy_SD: 0.551
Steps: 1.16e+04
TotalSteps: 7.65e+06
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.954
VF_0_Loss : 0.0313


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0018   5.5684   2.2087   2.1342
ADVA:  (20163,) (35248,) 0.5720324557421698
ADV1:  0.0008113146319705215 5.6113431931603754e-05 0.010659667993437767 0.0846607226279909 -0.1156261194759739
ADVB:  (20636,) (35248,) 0.5854516568315933
ADV2:  0.14536358389872017 0.3600830370771251 0.4945159222674903 3.0

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0007   5.5684   2.2087   2.1342
ADVA:  (18407,) (35230,) 0.5224808401930173
ADV1:  0.0007805330471805615 0.0006566629052282784 0.008901074269691117 0.07849424369640234 -0.09252009914342058
ADVB:  (21370,) (35230,) 0.606585296622197
ADV2:  0.20382718822260143 0.42932404102232263 0.5679902824779322 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.5664   1.9248   7.1766  40.3910  14.8004  11.7810
***** Episode 22266, Mean R = -15.0  Std R = 6.3  Min R = -29.5
PolicyLoss: 2.62
Policy_Entropy: 0.159
Policy_KL: 0.00474
Policy_SD: 0.543
Steps: 1.18e+04
TotalSteps: 7.74e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.0315


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0013   0.0047   5.5684   2.2087   2.1342
ADVA:  (18830,) (35385,) 0.5321463897131553
ADV1:  0.0 -0.0005238441256596658 0.00949438925024547 0.08844845185208472 -0.10553688432937636
ADVB:  (20076,) (35385,) 

***** Episode 22483, Mean R = -15.0  Std R = 4.9  Min R = -25.4
PolicyLoss: 2.13
Policy_Entropy: 0.159
Policy_KL: 0.00442
Policy_SD: 0.551
Steps: 1.17e+04
TotalSteps: 7.82e+06
VF_0_ExplainedVarNew: 0.962
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.0321


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0005   5.5684   2.2087   2.1342
ADVA:  (21292,) (35174,) 0.6053334849604822
ADV1:  0.0015249707821686671 0.0006351016559590268 0.009782063932345497 0.07496480306156483 -0.09975091604409662
ADVB:  (21650,) (35174,) 0.6155114573264343
ADV2:  0.18849092126568068 0.3653369997386765 0.46668900966163207 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5902   0.2250   1.1100  40.3910  14.8004  11.7810
***** Episode 22514, Mean R = -14.7  Std R = 6.5  Min R = -34.7
PolicyLoss: 2.19
Policy_Entropy: 0.159
Policy_KL: 0.00383
Policy_SD: 0.546
Steps: 1.16e+04
TotalSteps: 7.83e+06
VF_0_ExplainedVarNew: 0.955
VF_0_ExplainedVarOld: 0.953
VF_0_Loss : 0.0248


ValFun  Gradie

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8828   0.4104   1.6098  40.3910  14.8004  11.7810
***** Episode 22731, Mean R = -13.2  Std R = 4.3  Min R = -21.9
PolicyLoss: 2.37
Policy_Entropy: 0.162
Policy_KL: 0.00396
Policy_SD: 0.533
Steps: 1.18e+04
TotalSteps: 7.92e+06
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.0279


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0011   0.0041   5.5684   2.2087   2.1342
ADVA:  (21034,) (35010,) 0.6007997714938589
ADV1:  0.00042411953531495353 -0.000550752537409753 0.009841663212101824 0.06435823411416428 -0.08715916286294856
ADVB:  (19375,) (35010,) 0.5534133104827192
ADV2:  0.08524344560417192 0.30775326483551974 0.4473221572732108 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2739   0.1157   0.5260  40.3910  14.8004  11.7810
***** Episode 22762, Mean R = -15.9  Std R = 5.5  Min R = -31.5
PolicyLoss: 2.02
Policy_Entropy: 0.161
Policy_KL: 0.00435
Policy_SD: 0.539
Steps: 1.17e+04
TotalSt

w        |    0.00   -0.00   -0.00 |    0.01    0.01    0.00 |   -0.05   -0.05   -0.05 |    0.06    0.05    0.05
a_f      |   -0.05   -0.01 |    0.65    1.84 |   -1.50   -3.13 |    1.46    3.12
w_f      |    0.00    0.00    0.00 |    0.02    0.02    0.01 |   -0.04   -0.05   -0.04 |    0.06    0.05    0.05
w_rewards |   -0.00 |    0.01 |   -0.06 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.25 |    0.00 |    1.49
seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -0.98   -0.97 |    0.98    1.00
cs_angles |  0.0025 -0.0005 |  0.0799  0.0835 | -0.9806 -0.9737 |  0.9759  0.9970
optical_flow |  0.0000 -0.0003 |  0.0220  0.0260 | -0.8881 -1.0011 |  1.1198  0.9913
v_err    | -0.0107 |  0.0594 | -0.4531 |  0.1112
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0.03 |    0.02 |   -0.08 |    0.05
tracking_rewards |  -19.59 |    4.84 |  -41.38 |  -11.01
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9874   0.4228   2.0978  40.3910  14.8004  11.7810
Update Cnt = 750    ET =   1627.6   Stats:  Mean, Std, Min, Max
r_f      |   -3.08  -13.48   25.50 |  187.93  169.39  203.96 | -389.76 -373.86 -394.79 |  388.32  367.62  395.61
v_f      |    0.00    0.00   -0.01 |    0.04    0.04    0.05 |   -0.09   -0.12   -0.11 |    0.12    0.09    0.12
r_i      |  -34.41  -40.42   89.89 |  695.25  632.57  773.33 |-1375.66-1268.48-1284.45 | 1312.43 1221.76 1336.73
v_i      |    0.00    0.00   -0.01 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.10    0.09
norm_rf  |    0.27 |    0.13 |    0.02 |    0.79
norm_vf  |    0.07 |    0.02 |    0.03 |    0.14
gs_f     |    1.34 |    2.87 |    0.01 |   45.85
thrust   |   -0.00   -0.00   -0.01 |    0.66    0.66    0.65 |   -3.42   -3.37   -3.38 |    3.46    3.45    3.42
norm_thrust |    0.88 |    0.73 |    0.00 |    3.46
fuel     |    1.55 |    0.18 |    1.19 |    2.35
rewards  |  -14.14 |

ADVA:  (19670,) (35189,) 0.5589814999005371
ADV1:  0.0007749583685151447 -1.5012686703552342e-05 0.009596404110636775 0.08547727194933821 -0.10134226505735017
ADVB:  (20650,) (35189,) 0.5868311119952258
ADV2:  0.15826099443596053 0.3611264160747728 0.49896439749220034 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4853   0.2038   0.9310  40.3910  14.8004  11.7810
***** Episode 23568, Mean R = -14.4  Std R = 6.2  Min R = -35.6
PolicyLoss: 2.23
Policy_Entropy: 0.165
Policy_KL: 0.00427
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 8.23e+06
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.0251


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0017   5.5684   2.2087   2.1342
ADVA:  (17499,) (35190,) 0.49727195225916454
ADV1:  0.0006807099128765346 0.0005996268722061141 0.008254305160385882 0.08547727194933821 -0.09946018139743856
ADVB:  (21561,) (35190,) 0.6127024722932651
ADV2:  0.22441563510467963 0.4433423274937891 0.575380661528276

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0032   0.0020   0.0078   5.5684   2.2087   2.1342
ADVA:  (21106,) (35033,) 0.6024605372077755
ADV1:  0.001419474445680904 0.000412925551499585 0.010202832928104687 0.06775978350767997 -0.06918086845624699
ADVB:  (21290,) (35033,) 0.607712727999315
ADV2:  0.19594188059331263 0.3739301122393711 0.4834836908603833 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9518   0.4805   1.9108  40.3910  14.8004  11.7810
***** Episode 23816, Mean R = -15.4  Std R = 7.0  Min R = -33.1
PolicyLoss: 2.21
Policy_Entropy: 0.168
Policy_KL: 0.00486
Policy_SD: 0.544
Steps: 1.15e+04
TotalSteps: 8.32e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.03


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0007   0.0027   5.5684   2.2087   2.1342
ADVA:  (20724,) (35086,) 0.5906629424841817
ADV1:  5.992393802871712e-05 -0.0006963962642765551 0.010077347413567224 0.06775978350767997 -0.06918086845624699
ADVB:  (1893

***** Episode 24033, Mean R = -14.3  Std R = 5.5  Min R = -29.6
PolicyLoss: 2.19
Policy_Entropy: 0.167
Policy_KL: 0.00544
Policy_SD: 0.548
Steps: 1.16e+04
TotalSteps: 8.41e+06
VF_0_ExplainedVarNew: 0.949
VF_0_ExplainedVarOld: 0.944
VF_0_Loss : 0.0278


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   5.5684   2.2087   2.1342
ADVA:  (20268,) (35388,) 0.572736520854527
ADV1:  0.00036960816993447665 -0.0005168166229771469 0.009549832066607547 0.06186755863576099 -0.09085263859099191
ADVB:  (19888,) (35388,) 0.5619984175426699
ADV2:  0.10270766672053741 0.3148571234314099 0.4513619318000413 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4569   0.2099   0.9501  40.3910  14.8004  11.7810
***** Episode 24064, Mean R = -14.7  Std R = 4.8  Min R = -26.5
PolicyLoss: 2.01
Policy_Entropy: 0.167
Policy_KL: 0.00445
Policy_SD: 0.546
Steps: 1.18e+04
TotalSteps: 8.42e+06
VF_0_ExplainedVarNew: 0.956
VF_0_ExplainedVarOld: 0.953
VF_0_Loss : 0.0269


ValFun  Gradi

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6226   0.3184   1.3779  40.3910  14.8004  11.7810
***** Episode 24281, Mean R = -13.9  Std R = 6.2  Min R = -30.2
PolicyLoss: 2.3
Policy_Entropy: 0.167
Policy_KL: 0.00483
Policy_SD: 0.546
Steps: 1.18e+04
TotalSteps: 8.5e+06
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.956
VF_0_Loss : 0.0242


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0017   5.5684   2.2087   2.1342
ADVA:  (19580,) (35570,) 0.5504638740511667
ADV1:  0.0007230596668521055 -0.00021181180955841158 0.009940746012214697 0.06315773799480523 -0.10995384723734829
ADVB:  (21858,) (35570,) 0.6145066066910317
ADV2:  0.18824002224483066 0.3654719091184881 0.48066463053383873 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3600   0.1634   0.6597  40.3910  14.8004  11.7810
***** Episode 24312, Mean R = -15.5  Std R = 5.6  Min R = -31.1
PolicyLoss: 2.14
Policy_Entropy: 0.167
Policy_KL: 0.00494
Policy_SD: 0.543
Steps: 1.18e+04
TotalSte

theta_cv |    0.26 |    0.26 |    0.00 |    1.48
seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -0.97   -1.00 |    0.95    0.99
cs_angles |  0.0026 -0.0023 |  0.0827  0.0807 | -0.9700 -0.9958 |  0.9547  0.9869
optical_flow |  0.0001 -0.0002 |  0.0223  0.0274 | -1.0889 -1.1055 |  0.8998  1.3101
v_err    | -0.0104 |  0.0588 | -0.4534 |  0.1252
landing_rewards |    9.13 |    2.82 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.06 |    0.05
tracking_rewards |  -19.00 |    4.50 |  -35.70 |  -10.94
steps    |     379 |      20 |     332 |     418
***** Episode 24560, Mean R = -14.1  Std R = 6.2  Min R = -27.9
PolicyLoss: 2.26
Policy_Entropy: 0.166
Policy_KL: 0.00431
Policy_SD: 0.548
Steps: 1.16e+04
TotalSteps: 8.61e+06
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.0328


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0048   0.0028   0.0099   5.5684   2.2087   2.1342
ADVA:  (18723,) (34818,) 0.5377391004652766
ADV1:  0.001726771973006947 0.00

thrust   |   -0.00    0.00   -0.00 |    0.66    0.67    0.68 |   -3.34   -3.24   -3.44 |    3.45    3.43    3.44
norm_thrust |    0.90 |    0.74 |    0.00 |    3.46
fuel     |    1.56 |    0.19 |    1.09 |    2.20
rewards  |  -14.23 |    5.98 |  -42.44 |   -4.16
fuel_rewards |   -4.48 |    0.55 |   -6.32 |   -3.14
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    2.77 |   12.45 |    0.01 |  120.01
norm_af  |    1.82 |    0.92 |    0.04 |    3.35
norm_wf  |    0.02 |    0.01 |    0.00 |    0.06
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |    0.12   -0.02    0.19 |    1.27    0.66    1.90 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00   -0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.03    0.19 |    0.67

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2019   0.0727   0.4084  40.3910  14.8004  11.7810
***** Episode 25149, Mean R = -14.6  Std R = 7.0  Min R = -35.7
PolicyLoss: 2.1
Policy_Entropy: 0.169
Policy_KL: 0.0043
Policy_SD: 0.546
Steps: 1.18e+04
TotalSteps: 8.83e+06
VF_0_ExplainedVarNew: 0.954
VF_0_ExplainedVarOld: 0.947
VF_0_Loss : 0.0202


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0014   5.5684   2.2087   2.1342
ADVA:  (23117,) (35376,) 0.6534656264133876
ADV1:  0.0010859061043596366 -0.0006313146202203845 0.011398362618663895 0.07348735650658961 -0.1144953129391107
ADVB:  (21062,) (35376,) 0.59537539574853
ADV2:  0.13861422896149556 0.3190221700581978 0.4400173844760423 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3522   0.1655   0.7512  40.3910  14.8004  11.7810
Update Cnt = 810    ET =   1571.6   Stats:  Mean, Std, Min, Max
r_f      |    9.11   12.56  -11.87 |  197.06  167.89  194.36 | -390.50 -357.63 -367.05 |  388.39  382.77

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0635   0.5467   2.3739  40.3910  14.8004  11.7810
***** Episode 25397, Mean R = -14.7  Std R = 7.6  Min R = -35.2
PolicyLoss: 2.48
Policy_Entropy: 0.17
Policy_KL: 0.00527
Policy_SD: 0.545
Steps: 1.18e+04
TotalSteps: 8.92e+06
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.0179


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0034   0.0021   0.0077   5.5684   2.2087   2.1342
ADVA:  (19674,) (35304,) 0.5572739632902787
ADV1:  0.0007930650119130737 0.0007073638486230908 0.009272990026019393 0.07197667858962542 -0.091697114097886
ADVB:  (20650,) (35304,) 0.5849195558576932
ADV2:  0.16816517534301473 0.4206364571184673 0.5551500215567624 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5257   0.2611   1.1247  40.3910  14.8004  11.7810
***** Episode 25428, Mean R = -13.8  Std R = 6.1  Min R = -34.6
PolicyLoss: 2.55
Policy_Entropy: 0.17
Policy_KL: 0.00442
Policy_SD: 0.545
Steps: 1.18e+04
TotalSteps: 8

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5135   0.2239   1.0822  40.3910  14.8004  11.7810
***** Episode 25645, Mean R = -12.9  Std R = 4.9  Min R = -27.8
PolicyLoss: 2.56
Policy_Entropy: 0.17
Policy_KL: 0.00582
Policy_SD: 0.547
Steps: 1.16e+04
TotalSteps: 9.02e+06
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.0204


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0022   0.0087   5.5684   2.2087   2.1342
ADVA:  (19832,) (35106,) 0.5649176778898194
ADV1:  0.0008884220526919244 -0.00010835634010408646 0.009262679202676318 0.07951968859629599 -0.10183518715959508
ADVB:  (21305,) (35106,) 0.6068763174386145
ADV2:  0.18216410762616214 0.3594199848947516 0.47014128992501153 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2603   0.1063   0.5160  40.3910  14.8004  11.7810
***** Episode 25676, Mean R = -15.2  Std R = 6.7  Min R = -31.5
PolicyLoss: 2.11
Policy_Entropy: 0.17
Policy_KL: 0.00493
Policy_SD: 0.552
Steps: 1.18e+04
TotalStep

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2599   0.0908   0.4308  40.3910  14.8004  11.7810
***** Episode 25893, Mean R = -13.2  Std R = 5.3  Min R = -28.6
PolicyLoss: 2.18
Policy_Entropy: 0.171
Policy_KL: 0.00434
Policy_SD: 0.551
Steps: 1.16e+04
TotalSteps: 9.11e+06
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.953
VF_0_Loss : 0.025


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0017   0.0070   5.5684   2.2087   2.1342
ADVA:  (19467,) (34747,) 0.5602498057386249
ADV1:  0.002196837368099204 0.0008168970881158508 0.01074720451945369 0.06924865657262555 -0.1105196668685191
ADVB:  (22709,) (34747,) 0.653552824704291
ADV2:  0.3153562470548779 0.4599228633137864 0.5176479093440323 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8113   0.2968   1.5073  40.3910  14.8004  11.7810
***** Episode 25924, Mean R = -14.6  Std R = 6.5  Min R = -40.0
PolicyLoss: 2.51
Policy_Entropy: 0.171
Policy_KL: 0.00401
Policy_SD: 0.547
Steps: 1.18e+04
TotalSteps: 9.1

ADVA:  (21491,) (35104,) 0.6122094348222424
ADV1:  0.003286116540914404 0.001913075717928445 0.010000080088313329 0.06577631951039978 -0.12636350324425294
ADVB:  (24318,) (35104,) 0.6927415679124886
ADV2:  0.38134976674901633 0.4809249120911176 0.5159754481067302 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6271   0.3137   1.2775  40.3910  14.8004  11.7810
***** Episode 26141, Mean R = -13.4  Std R = 5.0  Min R = -26.2
PolicyLoss: 2.47
Policy_Entropy: 0.172
Policy_KL: 0.0034
Policy_SD: 0.547
Steps: 1.17e+04
TotalSteps: 9.2e+06
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.025


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0049   5.5684   2.2087   2.1342
ADVA:  (19516,) (35243,) 0.5537553556734671
ADV1:  0.0022382111199919074 0.0014164289911270153 0.009005039479255688 0.06176851542639833 -0.12636350324425294
ADVB:  (24215,) (35243,) 0.6870867973782028
ADV2:  0.36871622886139416 0.4885235599532874 0.5338981934017913 3.0 0.0

w        |    0.00    0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.02    0.02 |    0.63    1.81 |   -1.43   -3.12 |    1.48    3.10
w_f      |    0.00    0.00    0.00 |    0.02    0.02    0.01 |   -0.04   -0.04   -0.02 |    0.05    0.05    0.03
w_rewards |   -0.00 |    0.00 |   -0.04 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.26 |    0.00 |    1.53
seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -1.00   -1.00 |    0.94    0.94
cs_angles |  0.0018 -0.0007 |  0.0763  0.0807 | -0.9989 -0.9980 |  0.9450  0.9446
optical_flow | -0.0001 -0.0001 |  0.0238  0.0235 | -1.1469 -1.0029 |  1.1813  1.1159
v_err    | -0.0103 |  0.0585 | -0.4525 |  0.1128
landing_rewards |    9.13 |    2.82 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.06 |    0.03
tracking_rewards |  -17.86 |    4.01 |  -31.79 |  -10.00
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3035   0.1253   0.5576  40.3910  14.8004  11.7810
Update Cnt = 860    ET =   1361.5   Stats:  Mean, Std, Min, Max
r_f      |   10.32   13.18   -6.91 |  180.64  168.12  199.92 | -374.25 -368.11 -389.09 |  396.44  390.54  385.26
v_f      |   -0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.10   -0.12   -0.13 |    0.12    0.13    0.10
r_i      |   57.60   40.41  -44.14 |  686.35  621.71  784.39 |-1343.51-1262.85-1349.78 | 1356.11 1253.44 1315.40
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.25 |    0.11 |    0.02 |    0.59
norm_vf  |    0.08 |    0.02 |    0.03 |    0.14
gs_f     |    1.59 |    3.32 |    0.01 |   45.83
thrust   |   -0.00   -0.00    0.00 |    0.68    0.67    0.68 |   -3.39   -3.29   -3.41 |    3.24    3.15    3.36
norm_thrust |    0.90 |    0.74 |    0.00 |    3.46
fuel     |    1.58 |    0.20 |    1.18 |    2.31
rewards  |  -13.47 |

ADVA:  (20220,) (34862,) 0.5800011473811026
ADV1:  0.0005853597596401673 -0.00017297406265788837 0.009376530240594134 0.07419342887445726 -0.08872570375138056
ADVB:  (20538,) (34862,) 0.5891228271470369
ADV2:  0.1395431490553089 0.34399487910429516 0.4661072845000824 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4685   0.2017   0.9977  40.3910  14.8004  11.7810
***** Episode 26978, Mean R = -13.0  Std R = 6.3  Min R = -27.9
PolicyLoss: 2.04
Policy_Entropy: 0.174
Policy_KL: 0.0037
Policy_SD: 0.549
Steps: 1.17e+04
TotalSteps: 9.52e+06
VF_0_ExplainedVarNew: 0.954
VF_0_ExplainedVarOld: 0.951
VF_0_Loss : 0.0152


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0015   5.5684   2.2087   2.1342
ADVA:  (20218,) (34933,) 0.5787650645521427
ADV1:  0.0008487259619748711 -0.00012170424920785837 0.009573651339453494 0.07419342887445726 -0.09135797792548339
ADVB:  (21780,) (34933,) 0.623479231672058
ADV2:  0.19155783926835182 0.36922003693619593 0.465815243678844

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0019   5.5684   2.2087   2.1342
ADVA:  (22572,) (35152,) 0.6421256258534365
ADV1:  0.0 -0.0016296372071140466 0.009884979856258618 0.09306962915499095 -0.12388738861121001
ADVB:  (16460,) (35152,) 0.46825216203914427
ADV2:  0.0 0.22312333737822015 0.38716991313170324 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5667   0.1804   0.8997  40.3910  14.8004  11.7810
***** Episode 27226, Mean R = -15.0  Std R = 8.0  Min R = -39.1
PolicyLoss: 1.67
Policy_Entropy: 0.175
Policy_KL: 0.00545
Policy_SD: 0.548
Steps: 1.18e+04
TotalSteps: 9.61e+06
VF_0_ExplainedVarNew: 0.962
VF_0_ExplainedVarOld: 0.959
VF_0_Loss : 0.0237


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0048   5.5684   2.2087   2.1342
ADVA:  (22345,) (35120,) 0.63624715261959
ADV1:  0.00012667469845733213 -0.001231411684101081 0.010007100144533139 0.06734831295682578 -0.12388738861121001
ADVB:  (18246,) (35120,) 0.51953302961

Dynamics: Max Disturbance (m/s^2):  [0.00127453 0.00132576 0.00142314] 0.002325380757340127
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0028   5.5684   2.2087   2.1342
ADVA:  (18956,) (35075,) 0.5404419101924448
ADV1:  0.0006712231615405911 0.00023950699453637015 0.008777287487848815 0.06942946886529366 -0.06329994418505483
ADVB:  (21283,) (35075,) 0.6067854597291518
ADV2:  0.20151095423492912 0.40159760289874535 0.5218078780252646 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3953   0.6918   2.7723  40.3910  14.8004  11.7810
***** Episode 27474, Mean R = -13.2  Std R = 5.3  Min R = -26.1
PolicyLoss: 2.31
Policy_Entropy: 0.176
Policy_KL: 0.00529
Policy_SD: 0.55
Steps: 1.15e+04
TotalSteps: 9.71e+06
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.0155


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0002   0.0009   5.5684   2.2087   2.1342
ADVA:  (19936,) (34999,) 0.5696162747507072
ADV1:  0.000242987325824999

***** Episode 27691, Mean R = -14.2  Std R = 5.8  Min R = -30.3
PolicyLoss: 2.04
Policy_Entropy: 0.177
Policy_KL: 0.00359
Policy_SD: 0.553
Steps: 1.15e+04
TotalSteps: 9.79e+06
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.955
VF_0_Loss : 0.0224


Dynamics: Max Disturbance (m/s^2):  [0.00127453 0.00132576 0.00142314] 0.002325380757340127
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0001   0.0007   5.5684   2.2087   2.1342
ADVA:  (22488,) (34830,) 0.645650301464255
ADV1:  0.0010982303362732562 -4.689328908552255e-05 0.009954381083545662 0.10872942104646477 -0.1491893478049086
ADVB:  (20521,) (34830,) 0.5891759977031295
ADV2:  0.1393361286664692 0.3237669386933071 0.4558707289096825 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6267   0.2360   1.1658  40.3910  14.8004  11.7810
***** Episode 27722, Mean R = -14.2  Std R = 6.4  Min R = -32.4
PolicyLoss: 1.91
Policy_Entropy: 0.177
Policy_KL: 0.00431
Policy_SD: 0.559
Steps: 1.15e+04
TotalSteps: 9.8e+06
VF

w        |    0.00    0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.06    0.05    0.05
a_f      |   -0.00   -0.15 |    0.66    1.84 |   -1.44   -3.14 |    1.44    3.11
w_f      |    0.01    0.00    0.00 |    0.02    0.01    0.01 |   -0.04   -0.04   -0.03 |    0.06    0.04    0.03
w_rewards |   -0.00 |    0.00 |   -0.06 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.25 |    0.00 |    1.51
seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -0.97   -0.99 |    0.97    0.97
cs_angles |  0.0021 -0.0006 |  0.0780  0.0808 | -0.9678 -0.9923 |  0.9745  0.9650
optical_flow | -0.0001 -0.0000 |  0.0250  0.0227 | -1.0216 -0.9954 |  1.2966  1.1977
v_err    | -0.0107 |  0.0588 | -0.4530 |  0.1143
landing_rewards |    9.16 |    2.77 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.07 |    0.04
tracking_rewards |  -17.41 |    4.28 |  -36.11 |  -10.19
steps    |     375 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5716   0.2566   1.1597  40.3910  17.1452  11.7810
Update Cnt = 910    ET =   1734.1   Stats:  Mean, Std, Min, Max
r_f      |    9.60   -3.81   -3.82 |  187.85  170.76  200.57 | -381.41 -385.90 -389.10 |  382.60  392.04  373.83
v_f      |   -0.00   -0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.10    0.09    0.14
r_i      |   19.97   -3.60  -39.44 |  692.65  649.04  758.67 |-1317.15-1295.17-1334.04 | 1312.34 1212.03 1275.63
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.09    0.10
norm_rf  |    0.24 |    0.11 |    0.03 |    0.60
norm_vf  |    0.08 |    0.02 |    0.03 |    0.14
gs_f     |    1.21 |    1.66 |    0.01 |   17.23
thrust   |   -0.00   -0.00   -0.00 |    0.67    0.67    0.68 |   -3.36   -3.45   -3.45 |    3.46    3.28    3.45
norm_thrust |    0.90 |    0.74 |    0.00 |    3.46
fuel     |    1.54 |    0.18 |    1.09 |    2.15
rewards  |  -13.02 |

ADVA:  (21840,) (35376,) 0.6173677069199457
ADV1:  0.0 -0.0007913284506871676 0.009337249311824957 0.06539507188513694 -0.08639329168014764
ADVB:  (16242,) (35376,) 0.4591248303934871
ADV2:  0.0 0.22585794149415894 0.3924118427038491 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4289   0.1770   0.8138  40.3910  17.1452  11.7810
***** Episode 28528, Mean R = -14.5  Std R = 6.0  Min R = -30.2
PolicyLoss: 1.68
Policy_Entropy: 0.18
Policy_KL: 0.0052
Policy_SD: 0.543
Steps: 1.19e+04
TotalSteps: 1.01e+07
VF_0_ExplainedVarNew: 0.952
VF_0_ExplainedVarOld: 0.948
VF_0_Loss : 0.02


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0007   0.0030   5.5684   2.2087   2.1342
ADVA:  (20124,) (35409,) 0.5683300855714649
ADV1:  0.00015929982262065379 -0.0007013080539041249 0.009592153554747396 0.11313707064047712 -0.08639329168014764
ADVB:  (19327,) (35409,) 0.5458216837527182
ADV2:  0.06456901108253404 0.28653932923408154 0.4383319057072049 3.0 0.0
Policy  Gradients: u/sd/Ma

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0028   5.5684   2.2087   2.1342
ADVA:  (22135,) (35212,) 0.6286209246847666
ADV1:  0.0005600226214143994 -0.00033741166771522537 0.009195373102551531 0.0793100324084745 -0.0785263511293204
ADVB:  (19019,) (35212,) 0.5401283653300011
ADV2:  0.06457715722184515 0.2933410098731223 0.4246795716530774 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4331   0.1480   0.7684  40.3910  17.1452  11.7810
***** Episode 28776, Mean R = -13.5  Std R = 6.3  Min R = -28.9
PolicyLoss: 1.87
Policy_Entropy: 0.181
Policy_KL: 0.00393
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 1.02e+07
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.956
VF_0_Loss : 0.0206


Dynamics: Max Disturbance (m/s^2):  [0.00127453 0.00132576 0.00142314] 0.002325380757340127
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0017   0.0062   5.5684   2.2087   2.1342
ADVA:  (19533,) (35316,) 0.5530920829085967
ADV1:  0.0007681628886235699

***** Episode 28993, Mean R = -12.6  Std R = 5.5  Min R = -24.3
PolicyLoss: 2.06
Policy_Entropy: 0.182
Policy_KL: 0.00424
Policy_SD: 0.532
Steps: 1.19e+04
TotalSteps: 1.03e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.0113


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0009   0.0037   5.5684   2.2087   2.1342
ADVA:  (19111,) (35230,) 0.5424638092534771
ADV1:  0.001256395962049352 0.0005542829717593604 0.008697295050505406 0.08410034193877935 -0.09645729100782047
ADVB:  (22066,) (35230,) 0.626341186488788
ADV2:  0.23528675781047853 0.40779212377701984 0.5272279328765392 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4733   0.1771   0.8432  40.3910  17.1452  11.7810
***** Episode 29024, Mean R = -15.5  Std R = 7.7  Min R = -36.1
PolicyLoss: 2.23
Policy_Entropy: 0.182
Policy_KL: 0.00495
Policy_SD: 0.541
Steps: 1.16e+04
TotalSteps: 1.03e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.0125


Dynamics: Max D

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3567   0.1920   0.6818  40.3910  17.1452  11.7810
***** Episode 29241, Mean R = -14.0  Std R = 6.9  Min R = -31.8
PolicyLoss: 1.98
Policy_Entropy: 0.18
Policy_KL: 0.00342
Policy_SD: 0.551
Steps: 1.18e+04
TotalSteps: 1.04e+07
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.956
VF_0_Loss : 0.0132


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0017   5.5684   2.2087   2.1342
ADVA:  (23013,) (35121,) 0.6552489963269839
ADV1:  0.000983832277605439 -0.0006693172100397623 0.010805664691261305 0.0569753879437932 -0.07418632252032212
ADVB:  (20126,) (35121,) 0.5730474644799408
ADV2:  0.11223889183882776 0.2972160468502768 0.4106658774883451 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4931   0.1664   0.8637  40.3910  17.1452  11.7810
***** Episode 29272, Mean R = -12.6  Std R = 5.6  Min R = -23.9
PolicyLoss: 1.78
Policy_Entropy: 0.18
Policy_KL: 0.00469
Policy_SD: 0.542
Steps: 1.17e+04
TotalSteps: 

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.02   -0.11 |    0.65    1.94 |   -1.45   -3.13 |    1.45    3.13
w_f      |    0.00    0.00    0.00 |    0.02    0.02    0.01 |   -0.03   -0.05   -0.03 |    0.05    0.05    0.05
w_rewards |   -0.00 |    0.00 |   -0.04 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.26 |    0.00 |    1.60
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -0.98 |    1.00    0.99
cs_angles |  0.0007  0.0002 |  0.0797  0.0801 | -0.9927 -0.9761 |  0.9996  0.9861
optical_flow | -0.0003 -0.0001 |  0.0225  0.0229 | -0.9527 -1.1389 |  0.9737  1.2518
v_err    | -0.0104 |  0.0589 | -0.4521 |  0.1162
landing_rewards |    9.16 |    2.77 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.07 |    0.02
tracking_rewards |  -17.53 |    4.64 |  -38.83 |   -9.99
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2866   0.1184   0.5571  40.3910  17.1452  11.7810
Update Cnt = 960    ET =   1429.3   Stats:  Mean, Std, Min, Max
r_f      |   13.51   -5.01  -16.13 |  179.64  168.83  207.65 | -387.78 -384.20 -378.53 |  388.16  376.75  391.19
v_f      |   -0.00   -0.00    0.01 |    0.04    0.05    0.05 |   -0.11   -0.10   -0.11 |    0.11    0.10    0.11
r_i      |   47.69  -10.02  -76.48 |  655.55  660.41  783.49 |-1302.46-1301.55-1339.63 | 1317.50 1293.78 1279.44
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.10    0.10
norm_rf  |    0.22 |    0.09 |    0.04 |    0.56
norm_vf  |    0.08 |    0.02 |    0.04 |    0.14
gs_f     |    1.30 |    2.11 |    0.01 |   28.77
thrust   |    0.00   -0.00    0.00 |    0.65    0.68    0.67 |   -3.05   -3.44   -3.45 |    3.31    3.42    3.41
norm_thrust |    0.89 |    0.75 |    0.00 |    3.46
fuel     |    1.56 |    0.18 |    1.06 |    2.04
rewards  |  -13.09 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3060   0.1225   0.5614  40.3910  17.1452  11.7810
***** Episode 30078, Mean R = -14.0  Std R = 6.9  Min R = -29.7
PolicyLoss: 1.69
Policy_Entropy: 0.183
Policy_KL: 0.00398
Policy_SD: 0.547
Steps: 1.17e+04
TotalSteps: 1.07e+07
VF_0_ExplainedVarNew: 0.958
VF_0_ExplainedVarOld: 0.952
VF_0_Loss : 0.0116


Dynamics: Max Disturbance (m/s^2):  [0.00127453 0.00132576 0.00142314] 0.002325380757340127
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0021   5.5684   2.2087   2.1342
ADVA:  (20889,) (35075,) 0.595552387740556
ADV1:  0.00031369267217905977 -0.0008875270448371121 0.00938576337112238 0.05412641732653084 -0.10300765681255591
ADVB:  (19407,) (35075,) 0.5533000712758375
ADV2:  0.0812907320137554 0.28174445238035534 0.40498613511236214 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4216   0.1546   0.7517  40.3910  17.1452  11.7810
***** Episode 30109, Mean R = -12.9  Std R = 5.7  Min R = -31.1
PolicyL

ADVA:  (19032,) (35134,) 0.5416975009961861
ADV1:  7.924376092392337e-05 -0.00023399034021505483 0.0067965027494275715 0.0440776873521454 -0.06444909224201253
ADVB:  (19875,) (35134,) 0.5656913531052542
ADV2:  0.12557793413927903 0.35134990624421103 0.48918415787330277 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7229   0.3555   1.2951  40.3910  17.1452  11.7810
***** Episode 30326, Mean R = -10.4  Std R = 3.3  Min R = -17.8
PolicyLoss: 2.11
Policy_Entropy: 0.184
Policy_KL: 0.00387
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 1.08e+07
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.0178


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0004   5.5684   2.2087   2.1342
ADVA:  (20853,) (35199,) 0.5924316031705447
ADV1:  0.0006180134968259716 -0.00029819176164811465 0.008465650378667616 0.06199156398554767 -0.07043331103945025
ADVB:  (21129,) (35199,) 0.6002727350208813
ADV2:  0.16627819510007227 0.33870544021046317 0.44269766297

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0018   5.5684   2.2087   2.1342
ADVA:  (20168,) (34803,) 0.5794902738269689
ADV1:  0.00016243598474107498 -0.0001832250161097581 0.008358160070375698 0.06407221838112531 -0.09789745835572325
ADVB:  (18542,) (34803,) 0.5327701634916531
ADV2:  0.05522105075210319 0.3023427647462368 0.4615408439370737 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7798   0.3464   1.5314  40.3910  17.1452  11.7810
***** Episode 30574, Mean R = -12.6  Std R = 5.1  Min R = -25.9
PolicyLoss: 1.92
Policy_Entropy: 0.183
Policy_KL: 0.00606
Policy_SD: 0.546
Steps: 1.18e+04
TotalSteps: 1.09e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.0165


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0003   0.0010   5.5684   2.2087   2.1342
ADVA:  (19059,) (34952,) 0.545290684367132
ADV1:  0.000517395465359824 2.6585585260280784e-05 0.008531714283418167 0.06407221838112531 -0.06793916795445891
ADVB: 

***** Episode 30791, Mean R = -11.7  Std R = 4.1  Min R = -21.0
PolicyLoss: 1.97
Policy_Entropy: 0.185
Policy_KL: 0.004
Policy_SD: 0.533
Steps: 1.17e+04
TotalSteps: 1.1e+07
VF_0_ExplainedVarNew: 0.958
VF_0_ExplainedVarOld: 0.951
VF_0_Loss : 0.00941


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0007   0.0024   5.5684   2.2087   2.1342
ADVA:  (19975,) (34859,) 0.5730227487879744
ADV1:  0.000824401954456112 -0.00010520891649830964 0.008819128741597862 0.04577514798789972 -0.06928196395168457
ADVB:  (21030,) (34859,) 0.6032875297627586
ADV2:  0.18553637960629163 0.3659184730935472 0.4689966261153578 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8140   0.3888   1.6837  40.3910  17.1452  11.7810
***** Episode 30822, Mean R = -10.7  Std R = 4.8  Min R = -23.7
PolicyLoss: 2.06
Policy_Entropy: 0.185
Policy_KL: 0.00441
Policy_SD: 0.539
Steps: 1.15e+04
TotalSteps: 1.1e+07
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.0154


ValFun  Gradients

seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.97   -0.93 |    0.96    0.98
cs_angles |  0.0033  0.0031 |  0.0745  0.0772 | -0.9674 -0.9334 |  0.9602  0.9815
optical_flow | -0.0000 -0.0000 |  0.0208  0.0217 | -0.8783 -1.1839 |  1.0618  1.1258
v_err    | -0.0103 |  0.0588 | -0.4525 |  0.1138
landing_rewards |    9.58 |    2.00 |    0.00 |   10.00
landing_margin |   -0.03 |    0.02 |   -0.07 |    0.02
tracking_rewards |  -16.51 |    4.30 |  -35.16 |   -9.78
steps    |     377 |      19 |     337 |     418
***** Episode 31070, Mean R = -12.3  Std R = 6.9  Min R = -31.1
PolicyLoss: 2.2
Policy_Entropy: 0.184
Policy_KL: 0.00528
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 1.11e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.0111


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0010   0.0038   5.5684   2.2087   2.1342
ADVA:  (19336,) (34783,) 0.5559037460828565
ADV1:  5.223851795338984e-05 -0.00028531218142017594 0.007605437806434456 0.060531

attitude |   -0.13   -0.00   -0.04 |    1.15    0.65    1.86 |   -3.14   -1.56   -3.14 |    3.14    1.56    3.14
w        |    0.00    0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.06    0.05
a_f      |   -0.00   -0.05 |    0.66    1.88 |   -1.49   -3.13 |    1.48    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.02    0.01 |   -0.03   -0.05   -0.03 |    0.04    0.06    0.03
w_rewards |   -0.00 |    0.01 |   -0.09 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.25 |    0.00 |    1.39
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -1.00 |    0.96    0.99
cs_angles |  0.0017  0.0020 |  0.0755  0.0770 | -0.9927 -0.9999 |  0.9621  0.9909
optical_flow | -0.0001 -0.0001 |  0.0217  0.0227 | -1.0621 -1.1284 |  1.1620  1.1932
v_err    | -0.0106 |  0.0595 | -0.4531 |  0.1089
landing_rewards |    9.29 |    2.57 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8714   0.4597   1.6634  40.3910  17.1452  11.8547
Update Cnt = 1020    ET =   1389.2   Stats:  Mean, Std, Min, Max
r_f      |    2.60    9.72    3.14 |  184.55  166.80  203.48 | -384.27 -391.19 -364.58 |  373.61  382.68  384.00
v_f      |   -0.00   -0.01   -0.00 |    0.04    0.04    0.05 |   -0.11   -0.10   -0.10 |    0.11    0.09    0.10
r_i      |   18.75   75.26   16.10 |  684.84  615.36  785.46 |-1341.67-1258.98-1265.07 | 1285.37 1238.92 1328.67
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.09    0.10    0.10
norm_rf  |    0.20 |    0.10 |    0.04 |    0.52
norm_vf  |    0.08 |    0.02 |    0.03 |    0.12
gs_f     |    1.32 |    1.78 |    0.01 |   15.92
thrust   |    0.01   -0.00    0.00 |    0.67    0.68    0.66 |   -3.41   -3.42   -3.41 |    3.43    3.31    3.43
norm_thrust |    0.89 |    0.74 |    0.00 |    3.46
fuel     |    1.54 |    0.18 |    1.16 |    2.04
rewards  |  -12.10 

ADVA:  (19085,) (35097,) 0.5437786705416418
ADV1:  0.000458779736668748 -0.00016012727765882237 0.008359100365552668 0.051341688973521316 -0.08498682172013322
ADVB:  (20558,) (35097,) 0.5857480696355813
ADV2:  0.1775917603590533 0.3928192968231226 0.5121640627578419 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6131   0.2555   1.1057  40.3910  17.1452  11.8547
***** Episode 31938, Mean R = -11.0  Std R = 4.2  Min R = -22.6
PolicyLoss: 2.26
Policy_Entropy: 0.187
Policy_KL: 0.00445
Policy_SD: 0.536
Steps: 1.18e+04
TotalSteps: 1.14e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.0118


Dynamics: Max Disturbance (m/s^2):  [0.00127453 0.00132576 0.00142314] 0.002325380757340127
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0010   0.0038   5.5684   2.2087   2.1342
ADVA:  (22424,) (35351,) 0.634324347260332
ADV1:  0.0 -0.0007494775879832151 0.008582847603356583 0.051341688973521316 -0.07302478666315337
ADVB:  (17470,) (35351,) 0.49418686

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0049   0.0029   0.0113   5.5684   2.2087   2.1342
ADVA:  (22369,) (35218,) 0.6351581577602362
ADV1:  0.0023335990518197555 0.00035871249635330174 0.011135600938349179 0.06897549536876896 -0.06871271294641207
ADVB:  (22981,) (35218,) 0.6525356351865523
ADV2:  0.27112484464003905 0.4089177710477245 0.46924447613622505 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3122   0.6010   2.8452  40.3910  17.1452  11.8547
***** Episode 32186, Mean R = -12.6  Std R = 5.8  Min R = -28.4
PolicyLoss: 2.12
Policy_Entropy: 0.187
Policy_KL: 0.00401
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 1.15e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.0204


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0014   0.0059   5.5684   2.2087   2.1342
ADVA:  (20558,) (35354,) 0.5814900718447701
ADV1:  0.0026852028219136338 0.0017732201812961361 0.009379999018379461 0.0700240701378807 -0.06871271294641207
ADVB: 

***** Episode 32403, Mean R = -10.5  Std R = 4.8  Min R = -27.3
PolicyLoss: 2.16
Policy_Entropy: 0.188
Policy_KL: 0.00377
Policy_SD: 0.533
Steps: 1.15e+04
TotalSteps: 1.16e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.0128


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0036   5.5684   2.2087   2.1342
ADVA:  (20585,) (34601,) 0.5949250021675674
ADV1:  0.0018125529154798033 0.0005345042560363779 0.009553464350434275 0.06608399075944277 -0.07903948129740085
ADVB:  (22908,) (34601,) 0.6620617901216728
ADV2:  0.27275979323666344 0.4058463468440063 0.47414718481019946 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1895   0.0758   0.3941  40.3910  17.1452  11.8547
***** Episode 32434, Mean R = -12.3  Std R = 5.3  Min R = -27.0
PolicyLoss: 2.06
Policy_Entropy: 0.188
Policy_KL: 0.00352
Policy_SD: 0.54
Steps: 1.15e+04
TotalSteps: 1.16e+07
VF_0_ExplainedVarNew: 0.957
VF_0_ExplainedVarOld: 0.951
VF_0_Loss : 0.0137


ValFun  Gradie

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6168   0.2733   1.1821  40.3910  17.1452  11.8547
***** Episode 32651, Mean R = -10.5  Std R = 3.9  Min R = -19.1
PolicyLoss: 2.33
Policy_Entropy: 0.188
Policy_KL: 0.00495
Policy_SD: 0.54
Steps: 1.16e+04
TotalSteps: 1.17e+07
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.0075


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0039   0.0023   0.0090   5.5684   2.2087   2.1342
ADVA:  (19904,) (35133,) 0.5665328893063502
ADV1:  0.0 -0.0008436489800988142 0.009670443374549 0.06610771930250275 -0.08877641646056361
ADVB:  (19178,) (35133,) 0.545868556627672
ADV2:  0.07380208454394802 0.3281719001163614 0.4814318567503005 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7022   0.2959   1.4059  40.3910  17.1452  11.8547
***** Episode 32682, Mean R = -13.0  Std R = 5.3  Min R = -25.5
PolicyLoss: 2.02
Policy_Entropy: 0.188
Policy_KL: 0.00454
Policy_SD: 0.538
Steps: 1.19e+04
TotalSteps: 1.17e+07
VF_0_Expla

seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -0.99   -0.98 |    0.98    0.93
cs_angles |  0.0042 -0.0011 |  0.0752  0.0772 | -0.9866 -0.9769 |  0.9808  0.9273
optical_flow | -0.0000 -0.0001 |  0.0209  0.0219 | -1.0840 -1.0427 |  1.4423  1.0978
v_err    | -0.0106 |  0.0588 | -0.4534 |  0.1049
landing_rewards |    9.10 |    2.87 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.03
tracking_rewards |  -16.44 |    4.03 |  -34.55 |   -9.39
steps    |     378 |      19 |     335 |     416
***** Episode 32930, Mean R = -11.5  Std R = 6.0  Min R = -27.7
PolicyLoss: 1.82
Policy_Entropy: 0.188
Policy_KL: 0.00468
Policy_SD: 0.535
Steps: 1.17e+04
TotalSteps: 1.18e+07
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.961
VF_0_Loss : 0.0115


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0030   5.5684   2.2087   2.1342
ADVA:  (21686,) (35089,) 0.6180284419618683
ADV1:  0.0010034422882979916 0.00012111388564376158 0.009034908471069391 0.066603

glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    2.89 |   13.14 |    0.01 |  580.13
norm_af  |    1.78 |    0.87 |    0.10 |    3.22
norm_wf  |    0.02 |    0.01 |    0.00 |    0.05
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |   -0.03    0.02    0.00 |    1.22    0.65    1.87 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |   -0.00    0.00   -0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.02    0.07 |    0.65    1.87 |   -1.46   -3.12 |    1.50    3.14
w_f      |   -0.00    0.00    0.00 |    0.02    0.01    0.01 |   -0.04   -0.04   -0.02 |    0.04    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.23 |    0.26 |    0.00 |    1.36
seeker_angles | 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0016   0.0057   5.5684   2.2087   2.1342
ADVA:  (18949,) (35082,) 0.5401345419303346
ADV1:  0.0019031803475765795 0.0010616841364852964 0.008190790362883754 0.07379327427101573 -0.062498672265079436
ADVB:  (24845,) (35082,) 0.7081979362636109
ADV2:  0.3540109076851121 0.47191057803834896 0.5090466358311698 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5879   0.2826   1.2402  40.3910  17.1452  11.8547
Update Cnt = 1080    ET =   1380.1   Stats:  Mean, Std, Min, Max
r_f      |   -0.06   -9.09   -4.86 |  177.28  155.36  216.77 | -400.04 -376.07 -379.16 |  391.44  373.10  380.33
v_f      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.11   -0.11   -0.09 |    0.10    0.11    0.11
r_i      |   -0.18  -23.19  -17.59 |  649.00  640.27  801.97 |-1303.85-1345.20-1365.51 | 1291.92 1321.57 1283.63
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.10    0.09
no

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0027   5.5684   2.2087   2.1342
ADVA:  (18951,) (34777,) 0.544929119820571
ADV1:  0.0 -0.0007247898987176762 0.008491295020150206 0.05305573022970764 -0.09545889328942464
ADVB:  (19851,) (34777,) 0.57080829283722
ADV2:  0.12497914652403527 0.3309581960600779 0.45806619016399786 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3489   0.1587   0.7475  40.3910  17.1452  11.8547
***** Episode 33798, Mean R = -12.1  Std R = 6.1  Min R = -26.6
PolicyLoss: 1.95
Policy_Entropy: 0.188
Policy_KL: 0.00391
Policy_SD: 0.547
Steps: 1.15e+04
TotalSteps: 1.21e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.00358


Dynamics: Max Disturbance (m/s^2):  [0.00127453 0.00132576 0.00142314] 0.002325380757340127
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0013   0.0051   5.5684   2.2087   2.1342
ADVA:  (19390,) (34798,) 0.5572159319501121
ADV1:  0.001336521956028451 0.0007583055887497

***** Episode 34015, Mean R = -12.4  Std R = 4.4  Min R = -21.6
PolicyLoss: 1.95
Policy_Entropy: 0.189
Policy_KL: 0.00707
Policy_SD: 0.541
Steps: 1.16e+04
TotalSteps: 1.22e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.00604


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0020   5.5684   2.2087   2.1342
ADVA:  (21087,) (35053,) 0.6015747582232619
ADV1:  0.00034279727869276563 -0.000285669349546157 0.007193006832836556 0.04543439179029912 -0.05361804896321885
ADVB:  (18888,) (35053,) 0.5388411833509258
ADV2:  0.06863645155166816 0.3081404919385172 0.45078422249047784 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7412   0.2698   1.2815  40.3910  17.1452  11.8547
***** Episode 34046, Mean R = -11.5  Std R = 4.4  Min R = -21.7
PolicyLoss: 1.9
Policy_Entropy: 0.19
Policy_KL: 0.00505
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 1.22e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00734


ValFun  Grad

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9255   0.4748   2.0144  40.3910  17.1452  11.8547
***** Episode 34263, Mean R = -11.6  Std R = 5.6  Min R = -26.8
PolicyLoss: 2.53
Policy_Entropy: 0.19
Policy_KL: 0.00552
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 1.23e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.00864


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0039   0.0025   0.0090   5.5684   2.2087   2.1342
ADVA:  (20106,) (35420,) 0.5676453980801807
ADV1:  0.0 -0.0006639429336610917 0.008298000046928732 0.09744478486190883 -0.09166453807778674
ADVB:  (18320,) (35420,) 0.5172219085262564
ADV2:  0.029238837073805334 0.32462052016729226 0.49122163868085195 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4225   0.1726   0.8013  40.3910  17.1452  11.8547
***** Episode 34294, Mean R = -11.4  Std R = 4.4  Min R = -25.5
PolicyLoss: 2.1
Policy_Entropy: 0.19
Policy_KL: 0.00489
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 1.23e+07
VF_0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2371   0.6375   2.4758  40.3910  17.1452  11.8547
***** Episode 34511, Mean R = -12.5  Std R = 5.0  Min R = -23.9
PolicyLoss: 2.12
Policy_Entropy: 0.19
Policy_KL: 0.0074
Policy_SD: 0.551
Steps: 1.16e+04
TotalSteps: 1.24e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.007


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0016   0.0062   5.5684   2.2087   2.1342
ADVA:  (20225,) (34973,) 0.5783032625167986
ADV1:  0.00043808650380652847 -3.010402096845973e-05 0.008463317298326504 0.08456217344160383 -0.08460666127847088
ADVB:  (19700,) (34973,) 0.5632916821548052
ADV2:  0.12054359406283112 0.3565782640531708 0.5028782194403336 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6132   0.2514   1.2129  40.3910  17.1452  11.8547
***** Episode 34542, Mean R = -10.7  Std R = 6.2  Min R = -34.9
PolicyLoss: 2.1
Policy_Entropy: 0.19
Policy_KL: 0.00414
Policy_SD: 0.545
Steps: 1.17e+04
TotalSteps: 

attitude |    0.13   -0.08   -0.21 |    1.14    0.64    1.91 |   -3.13   -1.55   -3.14 |    3.14    1.56    3.14
w        |   -0.00   -0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.07   -0.25 |    0.65    1.90 |   -1.52   -3.14 |    1.56    3.10
w_f      |   -0.00   -0.00    0.00 |    0.02    0.01    0.01 |   -0.04   -0.04   -0.02 |    0.04    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.23 |    0.26 |    0.00 |    1.49
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -0.98 |    1.00    0.99
cs_angles |  0.0031  0.0005 |  0.0765  0.0807 | -0.9802 -0.9782 |  0.9967  0.9861
optical_flow | -0.0000  0.0000 |  0.0213  0.0217 | -1.0180 -0.9638 |  1.1417  1.1731
v_err    | -0.0108 |  0.0587 | -0.4527 |  0.1081
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6126   0.3002   1.1867  40.3910  17.1452  11.8547
Update Cnt = 1130    ET =   1340.8   Stats:  Mean, Std, Min, Max
r_f      |  -18.42   -1.12   -6.72 |  186.14  167.42  200.95 | -389.14 -384.32 -380.35 |  390.64  365.24  393.07
v_f      |    0.00   -0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.10    0.10
r_i      |  -15.92  -13.95  -14.71 |  716.90  632.63  753.14 |-1340.19-1336.90-1321.80 | 1309.94 1250.13 1266.72
v_i      |    0.00   -0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.08   -0.09 |    0.10    0.10    0.10
norm_rf  |    0.17 |    0.08 |    0.02 |    0.45
norm_vf  |    0.08 |    0.02 |    0.03 |    0.12
gs_f     |    1.17 |    1.67 |    0.01 |   14.75
thrust   |    0.00    0.00    0.00 |    0.66    0.69    0.66 |   -3.46   -3.45   -3.46 |    3.41    3.45    3.43
norm_thrust |    0.89 |    0.75 |    0.00 |    3.46
fuel     |    1.57 |    0.20 |    1.12 |    2.44
rewards  |  -11.45 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  13.4103   9.8320  35.2560  40.3910  17.1452  11.8547
***** Episode 35348, Mean R = -10.1  Std R = 3.7  Min R = -17.7
PolicyLoss: 2.49
Policy_Entropy: 0.192
Policy_KL: 0.00904
Policy_SD: 0.536
Steps: 1.18e+04
TotalSteps: 1.27e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.00798


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0014   0.0055   5.5684   2.2087   2.1342
ADVA:  (20472,) (35122,) 0.5828825237742725
ADV1:  0.00035319814219467865 -0.00019162266170697588 0.008012865198621752 0.056613095831261645 -0.11416755031211662
ADVB:  (19549,) (35122,) 0.5566026991629178
ADV2:  0.09202879664244781 0.32589093782404127 0.46301466527765045 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.3084   1.5850   8.0822  40.3910  17.1452  11.8547
***** Episode 35379, Mean R = -9.6  Std R = 3.7  Min R = -17.4
PolicyLoss: 1.93
Policy_Entropy: 0.193
Policy_KL: 0.00636
Policy_SD: 0.535
Steps: 1.16e+04
Tot

ADVA:  (22299,) (35193,) 0.6336203222231693
ADV1:  0.0007252943196505036 -0.00029230069784264413 0.008524957637878115 0.06941830445476266 -0.06942042720699254
ADVB:  (20682,) (35193,) 0.5876736851078339
ADV2:  0.12049057457737829 0.3006311794424332 0.4206598209528346 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5047   0.1647   0.8666  40.3910  17.1452  11.8547
***** Episode 35596, Mean R = -12.3  Std R = 5.4  Min R = -23.4
PolicyLoss: 1.69
Policy_Entropy: 0.193
Policy_KL: 0.00447
Policy_SD: 0.54
Steps: 1.19e+04
TotalSteps: 1.28e+07
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.0089


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0005   5.5684   2.2087   2.1342
ADVA:  (22319,) (35367,) 0.6310685101931178
ADV1:  0.0012714638639612077 7.466764489644223e-05 0.008860257520804425 0.047341051918723065 -0.06942042720699254
ADVB:  (21880,) (35367,) 0.6186558090875675
ADV2:  0.1793439018530404 0.33048140750037047 0.4158494100130414

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0012   5.5684   2.2087   2.1342
ADVA:  (20130,) (35188,) 0.5720700238717744
ADV1:  0.0 -0.0005146478695848414 0.007226367229429139 0.07899601964676023 -0.10945155359374065
ADVB:  (17057,) (35188,) 0.4847391156075935
ADV2:  0.0 0.29022072034631463 0.48014828884258715 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2621   0.0935   0.4773  40.3910  17.1452  11.8547
***** Episode 35844, Mean R = -11.9  Std R = 4.9  Min R = -23.0
PolicyLoss: 1.98
Policy_Entropy: 0.194
Policy_KL: 0.00522
Policy_SD: 0.543
Steps: 1.18e+04
TotalSteps: 1.29e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.00928


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0033   0.0020   0.0072   5.5684   2.2087   2.1342
ADVA:  (21738,) (35346,) 0.6150059412663385
ADV1:  0.0 -0.001157658100352819 0.008154653900508336 0.07899601964676023 -0.08449368969834836
ADVB:  (15610,) (35346,) 0.44163413116052735
ADV2:  0.0 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0012   0.0041   5.5684   2.2087   2.1342
ADVA:  (20523,) (35113,) 0.584484379004927
ADV1:  0.0011017439226049964 0.0007505533660208499 0.007536352467176626 0.05677008544051959 -0.07159231881741723
ADVB:  (20433,) (35113,) 0.5819212257568422
ADV2:  0.17711332558390475 0.4158435519507772 0.534117499460259 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7600   0.9482   3.6631  40.3910  17.1452  11.8547
***** Episode 36092, Mean R = -11.5  Std R = 5.9  Min R = -28.2
PolicyLoss: 2.38
Policy_Entropy: 0.192
Policy_KL: 0.00475
Policy_SD: 0.548
Steps: 1.17e+04
TotalSteps: 1.3e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.00386


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0016   0.0057   5.5684   2.2087   2.1342
ADVA:  (22038,) (35245,) 0.6252801815860406
ADV1:  0.0 -0.0007534592087251597 0.008256845413291873 0.05312369750114582 -0.08222301254748327
ADVB:  (16860,) (35245,) 0

seeker_angles |    0.00    0.00 |    0.08    0.08 |   -1.00   -0.99 |    0.96    0.99
cs_angles |  0.0023  0.0020 |  0.0806  0.0793 | -0.9952 -0.9938 |  0.9557  0.9896
optical_flow | -0.0001 -0.0000 |  0.0197  0.0224 | -0.9493 -1.2961 |  1.2107  1.1034
v_err    | -0.0106 |  0.0584 | -0.4525 |  0.0999
landing_rewards |    9.61 |    1.93 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.07 |    0.02
tracking_rewards |  -16.80 |    4.58 |  -34.09 |   -8.90
steps    |     377 |      19 |     334 |     420
***** Episode 36340, Mean R = -10.5  Std R = 3.9  Min R = -20.9
PolicyLoss: 3.07
Policy_Entropy: 0.193
Policy_KL: 0.0389
Policy_SD: 0.542
Steps: 1.16e+04
TotalSteps: 1.31e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00879


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0020   5.5684   2.2087   2.1342
ADVA:  (19204,) (34897,) 0.55030518382669
ADV1:  -0.0004156340519624307 -0.0008937130913012934 0.006220060897470144 0.0554393

w        |   -0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.00   -0.04 |    0.69    1.89 |   -1.52   -3.14 |    1.49    3.10
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.03   -0.02 |    0.04    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.25 |    0.00 |    1.52
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -0.99 |    0.99    1.00
cs_angles |  0.0008  0.0017 |  0.0761  0.0763 | -0.9922 -0.9905 |  0.9862  0.9987
optical_flow |  0.0002 -0.0001 |  0.0220  0.0226 | -0.9186 -1.0581 |  1.3307  1.2981
v_err    | -0.0107 |  0.0586 | -0.4617 |  0.0997
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0.03 |    0.02 |   -0.09 |    0.01
tracking_rewards |  -16.18 |    4.16 |  -39.93 |   -8.56
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5754   0.2254   1.0320  62.2550  28.0235  17.1554
Update Cnt = 1190    ET =   1242.0   Stats:  Mean, Std, Min, Max
r_f      |  -12.99   -4.49   -0.96 |  195.35  172.82  195.22 | -395.12 -385.99 -390.25 |  380.86  389.45  380.77
v_f      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
r_i      |  -19.23  -31.11   27.84 |  696.89  652.63  751.26 |-1313.14-1322.89-1369.42 | 1322.87 1244.56 1345.35
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.10    0.10
norm_rf  |    0.17 |    0.08 |    0.03 |    0.54
norm_vf  |    0.07 |    0.01 |    0.02 |    0.11
gs_f     |    1.60 |    5.17 |    0.01 |   85.55
thrust   |    0.00    0.00    0.00 |    0.66    0.68    0.67 |   -3.43   -3.42   -3.42 |    3.40    3.43    3.46
norm_thrust |    0.89 |    0.75 |    0.00 |    3.46
fuel     |    1.53 |    0.18 |    1.13 |    2.07
rewards  |  -11.35 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6496   0.3238   1.4000  62.2550  28.0235  17.1554
***** Episode 37208, Mean R = -11.4  Std R = 6.0  Min R = -26.1
PolicyLoss: 2.03
Policy_Entropy: 0.196
Policy_KL: 0.00624
Policy_SD: 0.532
Steps: 1.17e+04
TotalSteps: 1.34e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.00418


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0011   5.5684   2.2087   2.1342
ADVA:  (21075,) (35220,) 0.598381601362862
ADV1:  0.0007612269139391668 0.00022956193499494335 0.007353609561766935 0.06508099729500089 -0.12446320913106135
ADVB:  (20272,) (35220,) 0.5755820556501987
ADV2:  0.13744918626295202 0.34993334120108505 0.47408281468681324 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6099   0.3081   1.2119  62.2550  28.0235  17.1554
***** Episode 37239, Mean R = -10.4  Std R = 5.0  Min R = -27.0
PolicyLoss: 1.99
Policy_Entropy: 0.196
Policy_KL: 0.0047
Policy_SD: 0.532
Steps: 1.16e+04
TotalSt

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7482   0.2642   1.2285  62.2550  28.0235  17.1554
***** Episode 37456, Mean R = -11.5  Std R = 4.6  Min R = -22.5
PolicyLoss: 1.76
Policy_Entropy: 0.196
Policy_KL: 0.0044
Policy_SD: 0.538
Steps: 1.16e+04
TotalSteps: 1.35e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.00247


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0015   0.0058   5.5684   2.2087   2.1342
ADVA:  (20854,) (35383,) 0.5893790803493203
ADV1:  0.0 -0.0010120425385757403 0.007445729748915687 0.05634201460544602 -0.09094336407001014
ADVB:  (16743,) (35383,) 0.47319334143515246
ADV2:  0.0 0.25625647741415897 0.4177232639501682 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4651   0.1813   0.7485  62.2550  28.0235  17.1554
***** Episode 37487, Mean R = -12.1  Std R = 5.8  Min R = -31.5
PolicyLoss: 1.76
Policy_Entropy: 0.196
Policy_KL: 0.00605
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 1.35e+07
VF_0_ExplainedVarNe

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7166   0.3485   1.3930  62.2550  28.0235  17.1554
***** Episode 37704, Mean R = -12.6  Std R = 5.7  Min R = -27.7
PolicyLoss: 2.25
Policy_Entropy: 0.198
Policy_KL: 0.00519
Policy_SD: 0.526
Steps: 1.19e+04
TotalSteps: 1.36e+07
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.958
VF_0_Loss : 0.00246


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0010   0.0035   5.5684   2.2087   2.1342
ADVA:  (22573,) (34937,) 0.6461058476686607
ADV1:  0.0021649563161835602 0.0011962283547269916 0.009618732913459495 0.09188020892649895 -0.09677631065099684
ADVB:  (23036,) (34937,) 0.6593582734636632
ADV2:  0.2513164378082724 0.4109976761429889 0.5043055954174279 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6128   0.2310   1.2329  62.2550  28.0235  17.1554
***** Episode 37735, Mean R = -10.9  Std R = 4.5  Min R = -18.8
PolicyLoss: 2.05
Policy_Entropy: 0.198
Policy_KL: 0.00495
Policy_SD: 0.531
Steps: 1.15e+04
TotalStep

ADVA:  (19893,) (35341,) 0.5628872980391048
ADV1:  0.0012309933682578544 0.00012145005868092797 0.008896126049883664 0.09878148861882807 -0.0644196819903215
ADVB:  (22045,) (35341,) 0.6237797459041906
ADV2:  0.23374963987808728 0.39235458651434346 0.47490290871537366 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9018   0.4226   1.6655  62.2550  28.0235  17.1554
***** Episode 37952, Mean R = -11.0  Std R = 4.5  Min R = -19.9
PolicyLoss: 2.06
Policy_Entropy: 0.198
Policy_KL: 0.00412
Policy_SD: 0.53
Steps: 1.18e+04
TotalSteps: 1.37e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00349


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0030   5.5684   2.2087   2.1342
ADVA:  (22407,) (35202,) 0.6365263337310381
ADV1:  0.0011454421504957816 -0.0002826446341634072 0.009204677703144133 0.06440297633290898 -0.0644196819903215
ADVB:  (20125,) (35202,) 0.5717004715641156
ADV2:  0.12671354716212824 0.3106316885651827 0.4128770928317811

theta_cv |    0.23 |    0.26 |    0.00 |    1.43
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.97   -0.97 |    0.99    0.99
cs_angles |  0.0021  0.0013 |  0.0738  0.0796 | -0.9748 -0.9677 |  0.9948  0.9882
optical_flow | -0.0001 -0.0000 |  0.0198  0.0223 | -1.2036 -1.2246 |  0.9647  1.3547
v_err    | -0.0109 |  0.0590 | -0.4567 |  0.0982
landing_rewards |    9.26 |    2.62 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.07 |    0.02
tracking_rewards |  -16.31 |    4.59 |  -53.43 |   -8.23
steps    |     379 |      20 |     330 |     417
***** Episode 38200, Mean R = -12.2  Std R = 5.5  Min R = -22.7
PolicyLoss: 2.36
Policy_Entropy: 0.197
Policy_KL: 0.00474
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 1.38e+07
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.00164


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0010   5.5684   2.2087   2.1342
ADVA:  (20634,) (35327,) 0.5840858267047867
ADV1:  0.0 -0.000594731980630289

w        |   -0.00   -0.00   -0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.02    0.05 |    0.67    1.82 |   -1.51   -3.13 |    1.50    3.14
w_f      |   -0.00   -0.00    0.00 |    0.02    0.01    0.01 |   -0.04   -0.04   -0.02 |    0.04    0.04    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.23 |    0.26 |    0.00 |    1.56
seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -1.00   -0.98 |    0.98    0.99
cs_angles |  0.0013 -0.0005 |  0.0761  0.0789 | -0.9977 -0.9795 |  0.9845  0.9943
optical_flow |  0.0001 -0.0001 |  0.0200  0.0211 | -1.0096 -1.0103 |  1.1399  1.3331
v_err    | -0.0109 |  0.0594 | -0.4516 |  0.1130
landing_rewards |    9.55 |    2.08 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -16.51 |    4.62 |  -34.45 |   -9.01
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5890   0.3067   1.3290  62.2550  28.0235  17.1554
Update Cnt = 1250    ET =   1359.0   Stats:  Mean, Std, Min, Max
r_f      |    3.86   -8.67   -3.29 |  181.06  170.91  202.07 | -379.00 -377.10 -379.65 |  392.14  382.66  369.20
v_f      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.11   -0.10   -0.10 |    0.10    0.08    0.09
r_i      |   37.28  -27.63  -38.84 |  680.41  659.64  753.52 |-1286.05-1349.37-1322.25 | 1322.45 1357.03 1254.28
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.17 |    0.08 |    0.03 |    0.50
norm_vf  |    0.08 |    0.01 |    0.01 |    0.11
gs_f     |    1.32 |    2.23 |    0.01 |   18.91
thrust   |   -0.00    0.01   -0.00 |    0.66    0.67    0.67 |   -3.46   -3.36   -3.36 |    3.30    3.25    3.42
norm_thrust |    0.89 |    0.74 |    0.00 |    3.46
fuel     |    1.52 |    0.18 |    1.05 |    2.02
rewards  |  -10.76 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7554   0.3984   1.7135  62.2550  28.0235  17.1554
***** Episode 39068, Mean R = -11.4  Std R = 5.0  Min R = -25.0
PolicyLoss: 2.02
Policy_Entropy: 0.199
Policy_KL: 0.00473
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 1.41e+07
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.00644


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0038   0.0022   0.0081   5.5684   2.2087   2.1342
ADVA:  (18570,) (34950,) 0.5313304721030043
ADV1:  0.0017027383065833217 0.001077048309799067 0.00824091557660932 0.04936674580046185 -0.07788283195656939
ADVB:  (24353,) (34950,) 0.6967954220314735
ADV2:  0.3843762011449545 0.5130282571705871 0.5537380539166394 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7429   0.2749   1.2883  62.2550  28.0235  17.1554
***** Episode 39099, Mean R = -10.6  Std R = 5.0  Min R = -22.9
PolicyLoss: 2.4
Policy_Entropy: 0.199
Policy_KL: 0.00568
Policy_SD: 0.54
Steps: 1.16e+04
TotalSteps: 1

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2749   0.1090   0.5198  62.2550  28.0235  17.1554
***** Episode 39316, Mean R = -11.9  Std R = 5.1  Min R = -26.1
PolicyLoss: 2.11
Policy_Entropy: 0.2
Policy_KL: 0.005
Policy_SD: 0.538
Steps: 1.16e+04
TotalSteps: 1.42e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.004


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0019   5.5684   2.2087   2.1342
ADVA:  (20927,) (35218,) 0.5942131864387529
ADV1:  0.00010795093991295378 -0.000683171939990079 0.008174411108408048 0.06366503533125861 -0.06279305554472547
ADVB:  (19662,) (35218,) 0.5582940541768414
ADV2:  0.09578501724002406 0.30786539254479145 0.4307228900330631 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3380   0.1409   0.7008  62.2550  28.0235  17.1554
***** Episode 39347, Mean R = -11.3  Std R = 4.9  Min R = -23.1
PolicyLoss: 1.79
Policy_Entropy: 0.2
Policy_KL: 0.00543
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 1.

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5002   0.2498   0.8826  62.2550  28.0235  17.1554
***** Episode 39564, Mean R = -11.7  Std R = 5.5  Min R = -25.0
PolicyLoss: 1.82
Policy_Entropy: 0.199
Policy_KL: 0.00574
Policy_SD: 0.544
Steps: 1.16e+04
TotalSteps: 1.43e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.00303


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0026   5.5684   2.2087   2.1342
ADVA:  (21143,) (35025,) 0.6036545324768022
ADV1:  0.0001641868046429549 -0.00036389514119511423 0.008837434134115432 0.08436680125218343 -0.07288736331132128
ADVB:  (19089,) (35025,) 0.5450107066381157
ADV2:  0.07133595000233268 0.3129555369138697 0.4755530079523451 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4066   0.2189   0.8420  62.2550  28.0235  17.1554
***** Episode 39595, Mean R = -11.1  Std R = 5.4  Min R = -25.0
PolicyLoss: 1.86
Policy_Entropy: 0.2
Policy_KL: 0.00445
Policy_SD: 0.546
Steps: 1.18e+04
TotalSte

ADVA:  (19625,) (35237,) 0.5569429860657831
ADV1:  0.0012451973290730466 0.0004861667995576988 0.00785972613732642 0.07445717580794997 -0.07086930832255825
ADVB:  (23691,) (35237,) 0.6723330590004825
ADV2:  0.28726887837738724 0.43262046391841213 0.49260322814415297 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3097   0.1171   0.5251  62.2550  28.0235  17.1554
***** Episode 39812, Mean R = -10.7  Std R = 5.8  Min R = -26.3
PolicyLoss: 2.08
Policy_Entropy: 0.201
Policy_KL: 0.0042
Policy_SD: 0.538
Steps: 1.16e+04
TotalSteps: 1.44e+07
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.00209


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0026   5.5684   2.2087   2.1342
ADVA:  (20990,) (35027,) 0.5992520055956834
ADV1:  0.0009536833328388401 -9.543531665522257e-05 0.008989213653218349 0.06409737276714622 -0.07086930832255825
ADVB:  (21198,) (35027,) 0.6051902817826248
ADV2:  0.16154124753899193 0.33983937640527095 0.4440106497396304

cs_angles |  0.0014 -0.0017 |  0.0719  0.0768 | -0.9673 -0.9972 |  0.9963  0.9726
optical_flow |  0.0001 -0.0000 |  0.0209  0.0219 | -0.9809 -1.0526 |  1.1432  1.2718
v_err    | -0.0111 |  0.0587 | -0.4550 |  0.1101
landing_rewards |    9.35 |    2.46 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.07 |    0.05
tracking_rewards |  -15.84 |    4.18 |  -36.66 |   -8.61
steps    |     379 |      20 |     337 |     418
***** Episode 40060, Mean R = -9.8  Std R = 4.4  Min R = -19.8
PolicyLoss: 1.72
Policy_Entropy: 0.201
Policy_KL: 0.00701
Policy_SD: 0.54
Steps: 1.18e+04
TotalSteps: 1.45e+07
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.00277


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0027   5.5684   2.2087   2.1342
ADVA:  (21010,) (35170,) 0.5973841342052886
ADV1:  0.000314899745531878 -0.0007183592027232839 0.00888271050375243 0.046134646017364084 -0.06118888103374318
ADVB:  (20147,) (35170,) 0.5728461757179414
ADV2:  0.11

thrust   |   -0.00    0.00    0.00 |    0.67    0.69    0.67 |   -3.38   -3.37   -3.46 |    3.46    3.40    3.43
norm_thrust |    0.91 |    0.74 |    0.00 |    3.46
fuel     |    1.55 |    0.20 |    1.06 |    2.39
rewards  |  -11.38 |    5.47 |  -29.45 |   -2.18
fuel_rewards |   -4.43 |    0.56 |   -6.87 |   -3.03
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    2.98 |   13.11 |    0.01 |  201.57
norm_af  |    1.76 |    0.90 |    0.06 |    3.25
norm_wf  |    0.02 |    0.01 |    0.00 |    0.05
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |    0.08   -0.03    0.08 |    1.25    0.70    1.85 |   -3.14   -1.55   -3.14 |    3.14    1.57    3.14
w        |   -0.00   -0.00   -0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.03    0.12 |    0.70

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6012   0.3351   1.3939  62.2550  28.0235  17.1554
***** Episode 40649, Mean R = -10.6  Std R = 4.6  Min R = -22.8
PolicyLoss: 2.02
Policy_Entropy: 0.203
Policy_KL: 0.00506
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 1.47e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00507


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0003   0.0014   5.5684   2.2087   2.1342
ADVA:  (18065,) (35128,) 0.5142621270781144
ADV1:  0.0004493995735814927 0.0002228825343631912 0.0067669182075127555 0.04949943328861148 -0.08984639241363562
ADVB:  (21177,) (35128,) 0.6028524254156229
ADV2:  0.19035735413689062 0.4084406464894705 0.5372370250469725 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7362   0.3458   1.5714  62.2550  28.0235  17.1554
Update Cnt = 1310    ET =   1579.5   Stats:  Mean, Std, Min, Max
r_f      |   -9.95   15.40   -3.51 |  174.75  176.78  207.36 | -398.29 -373.94 -384.91 |  389.96 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9019   0.3823   1.7980  62.2550  28.0235  17.1554
***** Episode 40897, Mean R = -12.4  Std R = 4.5  Min R = -22.6
PolicyLoss: 1.96
Policy_Entropy: 0.204
Policy_KL: 0.00589
Policy_SD: 0.544
Steps: 1.18e+04
TotalSteps: 1.48e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.00235


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0005   5.5684   2.2087   2.1342
ADVA:  (18646,) (35377,) 0.527065607598157
ADV1:  0.0011573410013148936 0.00038557729933025233 0.00851262809535495 0.057466852538492486 -0.06743594441793804
ADVB:  (23836,) (35377,) 0.6737710942137547
ADV2:  0.2983738494755123 0.45101893494374107 0.5164503585821533 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5894   0.2458   1.1080  62.2550  28.0235  17.1554
***** Episode 40928, Mean R = -12.0  Std R = 5.0  Min R = -22.3
PolicyLoss: 2.15
Policy_Entropy: 0.204
Policy_KL: 0.00515
Policy_SD: 0.543
Steps: 1.18e+04
TotalSte

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.3907   4.0528  14.0961  62.2550  28.0235  17.1554
***** Episode 41145, Mean R = -10.9  Std R = 5.1  Min R = -30.3
PolicyLoss: 2.51
Policy_Entropy: 0.204
Policy_KL: 0.00587
Policy_SD: 0.53
Steps: 1.15e+04
TotalSteps: 1.49e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.0034


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0033   0.0021   0.0088   5.5684   2.2087   2.1342
ADVA:  (20406,) (35072,) 0.5818316605839416
ADV1:  0.0 -0.0009846813395664148 0.007755678225605596 0.04386375813760143 -0.09643684320998741
ADVB:  (16559,) (35072,) 0.4721430200729927
ADV2:  0.0 0.29169320473345767 0.47286519190460874 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7260   0.7497   3.5147  62.2550  28.0235  17.1554
***** Episode 41176, Mean R = -11.4  Std R = 6.5  Min R = -32.5
PolicyLoss: 1.98
Policy_Entropy: 0.203
Policy_KL: 0.00586
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 1.49e+07
VF_0_ExplainedVarNew:

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.0061   1.0632   4.1206  62.2550  28.0235  17.1554
***** Episode 41393, Mean R = -9.9  Std R = 4.2  Min R = -19.1
PolicyLoss: 2.4
Policy_Entropy: 0.204
Policy_KL: 0.00646
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 1.5e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00196


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0013   0.0048   5.5684   2.2087   2.1342
ADVA:  (20296,) (35327,) 0.5745180739943953
ADV1:  0.0 -0.0005915033631111447 0.007597497116970098 0.05008827406549893 -0.06410683467384078
ADVB:  (17645,) (35327,) 0.49947632122738983
ADV2:  0.0 0.2823210296461328 0.43860606565265087 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7846   0.3556   1.6277  62.2550  28.0235  17.1554
***** Episode 41424, Mean R = -10.4  Std R = 6.2  Min R = -28.9
PolicyLoss: 1.8
Policy_Entropy: 0.204
Policy_KL: 0.00654
Policy_SD: 0.537
Steps: 1.16e+04
TotalSteps: 1.5e+07
VF_0_ExplainedVarNew: 0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.4781   1.9039   7.8942  80.7417  29.0566  22.3960
***** Episode 41641, Mean R = -10.4  Std R = 4.7  Min R = -22.7
PolicyLoss: 2.31
Policy_Entropy: 0.204
Policy_KL: 0.014
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 1.51e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.00243


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0054   5.5684   2.2087   2.1342
ADVA:  (21536,) (35367,) 0.6089292278112365
ADV1:  2.2170975396372354e-05 -0.00039003800318490877 0.008486231420983838 0.07791475876266157 -0.09981050891791554
ADVB:  (18614,) (35367,) 0.526309836853564
ADV2:  0.05285795520934976 0.330038779587653 0.48088725341693445 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5530   0.2339   0.9953  80.7417  29.0566  22.3960
***** Episode 41672, Mean R = -11.5  Std R = 6.2  Min R = -32.6
PolicyLoss: 2.01
Policy_Entropy: 0.204
Policy_KL: 0.0129
Policy_SD: 0.536
Steps: 1.16e+04
TotalStep

w        |   -0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.01    0.02 |    0.67    1.89 |   -1.41   -3.14 |    1.53    3.13
w_f      |   -0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.04   -0.02 |    0.04    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.25 |    0.00 |    1.40
seeker_angles |    0.00    0.00 |    0.08    0.07 |   -0.98   -0.99 |    0.98    0.98
cs_angles |  0.0027  0.0008 |  0.0774  0.0747 | -0.9787 -0.9853 |  0.9767  0.9816
optical_flow |  0.0001  0.0000 |  0.0211  0.0215 | -0.9198 -1.0333 |  1.2144  1.2617
v_err    | -0.0113 |  0.0600 | -0.4529 |  0.1153
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.03
tracking_rewards |  -15.92 |    4.55 |  -38.49 |   -8.73
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6674   0.2544   1.1543  80.7417  29.0566  22.3960
Update Cnt = 1360    ET =   1459.7   Stats:  Mean, Std, Min, Max
r_f      |   -9.00    4.22    0.51 |  180.73  166.79  211.90 | -384.95 -387.78 -374.40 |  371.45  385.45  397.95
v_f      |    0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.11    0.09    0.12
r_i      |  -13.80   14.56   24.71 |  695.44  632.77  773.90 |-1279.95-1274.33-1315.73 | 1275.88 1347.53 1264.69
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.16 |    0.07 |    0.02 |    0.43
norm_vf  |    0.08 |    0.02 |    0.04 |    0.13
gs_f     |    1.26 |    1.79 |    0.01 |   19.60
thrust   |   -0.00    0.00    0.00 |    0.68    0.68    0.68 |   -3.41   -3.45   -3.45 |    3.31    3.35    3.44
norm_thrust |    0.91 |    0.74 |    0.00 |    3.46
fuel     |    1.53 |    0.20 |    1.03 |    2.57
rewards  |  -10.95 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6642   0.2261   1.2388  80.7417  29.0566  22.3960
***** Episode 42478, Mean R = -11.0  Std R = 4.6  Min R = -24.1
PolicyLoss: 1.97
Policy_Entropy: 0.205
Policy_KL: 0.00581
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 1.54e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.0043


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0021   5.5684   2.2087   2.1342
ADVA:  (19722,) (34846,) 0.5659760087241004
ADV1:  0.0 -0.00036946078387891914 0.007110899227144989 0.04954788909335764 -0.07057332870096139
ADVB:  (18693,) (34846,) 0.5364460770246227
ADV2:  0.06772042025544951 0.3309788067175781 0.4950070448525717 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4005   0.1659   0.8039  80.7417  29.0566  22.3960
***** Episode 42509, Mean R = -11.3  Std R = 5.0  Min R = -26.1
PolicyLoss: 1.97
Policy_Entropy: 0.205
Policy_KL: 0.00555
Policy_SD: 0.544
Steps: 1.14e+04
TotalSteps: 1.54e+07
VF_0_

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6490   0.2611   1.1401  80.7417  29.0566  22.3960
***** Episode 42726, Mean R = -10.3  Std R = 4.7  Min R = -22.4
PolicyLoss: 2.25
Policy_Entropy: 0.204
Policy_KL: 0.00726
Policy_SD: 0.544
Steps: 1.18e+04
TotalSteps: 1.55e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00612


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0015   5.5684   2.2087   2.1342
ADVA:  (19525,) (35057,) 0.5569501098211485
ADV1:  0.00019149406183711394 2.7409902926943843e-05 0.007119724341604773 0.051774771099988876 -0.09707301714451164
ADVB:  (19405,) (35057,) 0.5535271129874205
ADV2:  0.09889358426822274 0.36424231892655184 0.5242015077209056 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3869   0.1314   0.6662  80.7417  29.0566  22.3960
***** Episode 42757, Mean R = -11.7  Std R = 6.4  Min R = -29.5
PolicyLoss: 2.1
Policy_Entropy: 0.204
Policy_KL: 0.00669
Policy_SD: 0.551
Steps: 1.17e+04
TotalS

ADVA:  (17166,) (35249,) 0.4869925387954268
ADV1:  0.0009251151161981871 0.0008107855716370184 0.0056648056618850565 0.03901278060141866 -0.06892942486475978
ADVB:  (23054,) (35249,) 0.6540327385174047
ADV2:  0.33626536143751773 0.5285675240167289 0.6125966416040521 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8471   0.2999   1.4308  80.7417  29.0566  22.3960
***** Episode 42974, Mean R = -10.5  Std R = 4.1  Min R = -20.2
PolicyLoss: 2.58
Policy_Entropy: 0.205
Policy_KL: 0.00624
Policy_SD: 0.541
Steps: 1.18e+04
TotalSteps: 1.56e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00543


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0012   0.0045   5.5684   2.2087   2.1342
ADVA:  (19802,) (35127,) 0.5637259088450479
ADV1:  0.0 -0.0004619250025111522 0.006350362264867759 0.06962064379790789 -0.07035709974242627
ADVB:  (17249,) (35127,) 0.4910467731374726
ADV2:  0.0 0.2980129740492085 0.4684488716770092 3.0 0.0
Policy  Gradients: u/sd/M

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0013   5.5684   2.2087   2.1342
ADVA:  (19284,) (34972,) 0.5514125586183233
ADV1:  0.0008759936614558348 0.0007348567589060831 0.006480237316287695 0.07640246849858728 -0.05393543693866881
ADVB:  (20931,) (34972,) 0.5985073773304358
ADV2:  0.1932611317101144 0.42556847090768823 0.5673640929098935 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2578   0.6128   2.6313  80.7417  29.0566  22.3960
***** Episode 43222, Mean R = -10.4  Std R = 5.2  Min R = -24.1
PolicyLoss: 2.27
Policy_Entropy: 0.205
Policy_KL: 0.0067
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 1.56e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.00308


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0038   0.0024   0.0087   5.5684   2.2087   2.1342
ADVA:  (19303,) (35230,) 0.5479137099063298
ADV1:  0.0 -0.000723594328822139 0.007332552937858608 0.07640246849858728 -0.12392528934923708
ADVB:  (18280,) (35230,) 

theta_cv |    0.22 |    0.25 |    0.00 |    1.55
seeker_angles |    0.00   -0.00 |    0.07    0.08 |   -0.99   -0.98 |    1.00    0.98
cs_angles |  0.0034 -0.0005 |  0.0715  0.0755 | -0.9905 -0.9799 |  0.9988  0.9828
optical_flow | -0.0000 -0.0000 |  0.0218  0.0216 | -1.0187 -0.8651 |  1.0568  1.1085
v_err    | -0.0111 |  0.0598 | -0.4537 |  0.1013
landing_rewards |    9.81 |    1.38 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.07 |    0.03
tracking_rewards |  -15.41 |    4.15 |  -34.18 |   -9.20
steps    |     379 |      20 |     337 |     418
***** Episode 43470, Mean R = -10.2  Std R = 5.2  Min R = -27.6
PolicyLoss: 2
Policy_Entropy: 0.205
Policy_KL: 0.00581
Policy_SD: 0.547
Steps: 1.18e+04
TotalSteps: 1.57e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00256


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0007   0.0030   5.5684   2.2087   2.1342
ADVA:  (19239,) (35213,) 0.5463607190526226
ADV1:  0.0 -0.0001387391695866466 0

attitude |    0.03    0.03    0.10 |    1.18    0.68    1.85 |   -3.14   -1.54   -3.14 |    3.14    1.57    3.14
w        |   -0.00   -0.00   -0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.04    0.17 |    0.69    1.84 |   -1.31   -3.11 |    1.52    3.14
w_f      |   -0.00   -0.00    0.00 |    0.02    0.01    0.01 |   -0.05   -0.04   -0.02 |    0.04    0.04    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.25 |    0.00 |    1.45
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.96   -0.99 |    1.00    1.00
cs_angles |  0.0037  0.0026 |  0.0731  0.0734 | -0.9605 -0.9950 |  0.9999  0.9955
optical_flow |  0.0001  0.0000 |  0.0223  0.0217 | -1.1924 -1.0045 |  1.1989  1.0923
v_err    | -0.0111 |  0.0597 | -0.4523 |  0.0960
landing_rewards |    9.71 |    1.68 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2886   0.1061   0.5733  80.7417  29.0566  22.3960
Update Cnt = 1420    ET =   1688.5   Stats:  Mean, Std, Min, Max
r_f      |  -10.74   16.69  -19.83 |  185.78  157.21  203.25 | -390.48 -348.83 -392.13 |  384.78  375.98  394.83
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.04 |   -0.09   -0.11   -0.11 |    0.10    0.09    0.09
r_i      |  -28.63   47.11  -66.84 |  671.92  637.67  783.66 |-1320.43-1310.53-1291.29 | 1236.41 1319.05 1281.30
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.08    0.09    0.09
norm_rf  |    0.16 |    0.07 |    0.01 |    0.39
norm_vf  |    0.07 |    0.01 |    0.04 |    0.11
gs_f     |    1.47 |    2.97 |    0.00 |   39.92
thrust   |    0.00    0.01    0.00 |    0.67    0.69    0.68 |   -3.42   -3.43   -3.32 |    3.46    3.39    3.40
norm_thrust |    0.92 |    0.74 |    0.00 |    3.46
fuel     |    1.55 |    0.21 |    1.10 |    2.54
rewards  |  -10.51 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3903   0.1382   0.7863  80.7417  29.0566  22.3960
***** Episode 44338, Mean R = -11.0  Std R = 5.2  Min R = -30.8
PolicyLoss: 2.38
Policy_Entropy: 0.206
Policy_KL: 0.00736
Policy_SD: 0.555
Steps: 1.19e+04
TotalSteps: 1.61e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00164


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0015   5.5684   2.2087   2.1342
ADVA:  (18813,) (35042,) 0.5368700416642885
ADV1:  0.0 -0.00014366687154711814 0.0056751319104244745 0.0337842961697552 -0.06095083791102268
ADVB:  (18102,) (35042,) 0.5165801038753496
ADV2:  0.03396516362171084 0.3542203250472057 0.5301052655312312 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3324   0.1343   0.6786  80.7417  29.0566  22.3960
***** Episode 44369, Mean R = -10.3  Std R = 5.2  Min R = -28.1
PolicyLoss: 2.17
Policy_Entropy: 0.207
Policy_KL: 0.00663
Policy_SD: 0.552
Steps: 1.15e+04
TotalSteps: 1.61e+07
VF_0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5010   0.2220   0.9685  80.7417  29.0566  22.3960
***** Episode 44586, Mean R = -10.7  Std R = 5.2  Min R = -24.5
PolicyLoss: 2.58
Policy_Entropy: 0.206
Policy_KL: 0.00631
Policy_SD: 0.546
Steps: 1.18e+04
TotalSteps: 1.62e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00155


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0016   0.0060   5.5684   2.2087   2.1342
ADVA:  (17697,) (35370,) 0.5003392705682782
ADV1:  0.0004910512324558039 0.0005577443801499306 0.00594846090331437 0.04590098495232142 -0.05074697253926984
ADVB:  (21796,) (35370,) 0.6162284421826406
ADV2:  0.27417696835522987 0.5195629984154959 0.6354363517173542 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4978   0.1828   0.7893  80.7417  29.0566  22.3960
***** Episode 44617, Mean R = -11.0  Std R = 5.8  Min R = -29.6
PolicyLoss: 2.68
Policy_Entropy: 0.206
Policy_KL: 0.00548
Policy_SD: 0.55
Steps: 1.17e+04
TotalSteps

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5408   0.2181   1.1572  80.7417  29.0566  22.3960
***** Episode 44834, Mean R = -10.3  Std R = 5.0  Min R = -26.5
PolicyLoss: 2.31
Policy_Entropy: 0.207
Policy_KL: 0.00426
Policy_SD: 0.538
Steps: 1.14e+04
TotalSteps: 1.63e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.00185


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   5.5684   2.2087   2.1342
ADVA:  (20347,) (35092,) 0.5798187621110225
ADV1:  0.00011411387298916609 -0.00018930069819992937 0.00691408179097726 0.07302837446452815 -0.10816847784768202
ADVB:  (18839,) (35092,) 0.536846004787416
ADV2:  0.0664790553692137 0.32138785228574174 0.4619427048451969 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5682   0.2161   1.0951  80.7417  29.0566  22.3960
***** Episode 44865, Mean R = -9.5  Std R = 4.5  Min R = -24.0
PolicyLoss: 1.9
Policy_Entropy: 0.207
Policy_KL: 0.00521
Policy_SD: 0.537
Steps: 1.18e+04
TotalStep

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8527   0.4488   1.6572  80.7417  29.0566  22.3960
***** Episode 45082, Mean R = -9.8  Std R = 4.2  Min R = -20.4
PolicyLoss: 2.21
Policy_Entropy: 0.206
Policy_KL: 0.00565
Policy_SD: 0.558
Steps: 1.18e+04
TotalSteps: 1.64e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00138


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0023   5.5684   2.2087   2.1342
ADVA:  (18807,) (35285,) 0.5330026923621936
ADV1:  0.0003707831970997423 0.00018748784269773645 0.0069151976834384895 0.05956176990237155 -0.09762790093372953
ADVB:  (19562,) (35285,) 0.5543998866373814
ADV2:  0.10906647941856265 0.37700980369135423 0.5248862784680041 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5604   0.2225   1.0432  80.7417  29.0566  22.3960
***** Episode 45113, Mean R = -11.1  Std R = 5.4  Min R = -26.4
PolicyLoss: 2.15
Policy_Entropy: 0.207
Policy_KL: 0.00639
Policy_SD: 0.556
Steps: 1.17e+04
TotalSt

***** Episode 45330, Mean R = -9.0  Std R = 4.1  Min R = -21.0
PolicyLoss: 1.9
Policy_Entropy: 0.207
Policy_KL: 0.00541
Policy_SD: 0.545
Steps: 1.18e+04
TotalSteps: 1.64e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00763


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0009   0.0032   5.5684   2.2087   2.1342
ADVA:  (19079,) (35319,) 0.540190832130015
ADV1:  0.0008586614661471887 0.000719176443292863 0.007374196295586371 0.047793415108681736 -0.07958528104911239
ADVB:  (21562,) (35319,) 0.6104929358135848
ADV2:  0.2024487677250212 0.42401687891087664 0.536415010250495 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5859   0.2323   1.0539  80.7417  29.0566  22.3960
***** Episode 45361, Mean R = -10.9  Std R = 6.5  Min R = -30.9
PolicyLoss: 2.2
Policy_Entropy: 0.208
Policy_KL: 0.00558
Policy_SD: 0.547
Steps: 1.18e+04
TotalSteps: 1.65e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00793


ValFun  Gradients

attitude |   -0.04    0.01    0.12 |    1.23    0.60    1.91 |   -3.14   -1.54   -3.14 |    3.14    1.54    3.14
w        |   -0.00   -0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.00    0.20 |    0.59    1.90 |   -1.51   -3.13 |    1.39    3.14
w_f      |   -0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.05   -0.03 |    0.03    0.04    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.27 |    0.00 |    1.47
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.97   -0.99 |    0.99    0.91
cs_angles |  0.0036  0.0012 |  0.0791  0.0803 | -0.9737 -0.9900 |  0.9853  0.9139
optical_flow |  0.0000 -0.0001 |  0.0221  0.0185 | -0.8997 -1.1713 |  1.1658  0.9175
v_err    | -0.0110 |  0.0590 | -0.4522 |  0.1013
landing_rewards |    9.77 |    1.49 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5362   0.7930   3.2409  80.7417  29.0566  22.3960
Update Cnt = 1480    ET =   1559.9   Stats:  Mean, Std, Min, Max
r_f      |   -3.71   -2.91   -0.55 |  189.82  165.73  198.54 | -391.40 -350.24 -389.59 |  399.09  392.75  384.86
v_f      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.09 |    0.12    0.09    0.10
r_i      |  -36.83   -2.49  -12.44 |  697.32  659.11  755.70 |-1320.45-1287.59-1331.60 | 1294.38 1299.69 1302.89
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.16 |    0.06 |    0.01 |    0.41
norm_vf  |    0.08 |    0.01 |    0.03 |    0.15
gs_f     |    1.31 |    2.07 |    0.01 |   22.82
thrust   |   -0.00    0.00   -0.01 |    0.68    0.70    0.67 |   -3.45   -3.42   -3.45 |    3.40    3.42    3.42
norm_thrust |    0.93 |    0.74 |    0.00 |    3.46
fuel     |    1.59 |    0.22 |    1.17 |    2.28
rewards  |  -10.98 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5764   0.2016   1.0374  80.7417  29.0566  22.3960
***** Episode 46198, Mean R = -11.4  Std R = 6.2  Min R = -28.7
PolicyLoss: 2.19
Policy_Entropy: 0.208
Policy_KL: 0.00513
Policy_SD: 0.552
Steps: 1.14e+04
TotalSteps: 1.68e+07
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.00299


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0022   5.5684   2.2087   2.1342
ADVA:  (19337,) (34708,) 0.5571338020053014
ADV1:  0.0 -0.0003511313161410597 0.007416825131531482 0.04639589327944704 -0.05330606931349265
ADVB:  (19154,) (34708,) 0.5518612423648727
ADV2:  0.091955971536739 0.3471315079493703 0.47960573869868045 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6532   0.2027   1.1172  80.7417  29.0566  22.3960
***** Episode 46229, Mean R = -11.0  Std R = 5.5  Min R = -28.6
PolicyLoss: 1.99
Policy_Entropy: 0.208
Policy_KL: 0.00584
Policy_SD: 0.557
Steps: 1.17e+04
TotalSteps: 1.68e+07
VF_0_

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4605   0.2138   0.9290  80.7417  29.0566  22.3960
***** Episode 46446, Mean R = -10.3  Std R = 5.7  Min R = -22.6
PolicyLoss: 2.14
Policy_Entropy: 0.209
Policy_KL: 0.00663
Policy_SD: 0.549
Steps: 1.17e+04
TotalSteps: 1.69e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00304


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0015   5.5684   2.2087   2.1342
ADVA:  (18417,) (35120,) 0.5244020501138952
ADV1:  0.0002631884913226277 0.0002501131531490456 0.0075568038333743495 0.05460899511151346 -0.09104239041560447
ADVB:  (20615,) (35120,) 0.5869874715261959
ADV2:  0.1569442612145423 0.41008205150073423 0.5525528262876933 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6130   0.3122   1.3231  80.7417  29.0566  22.3960
***** Episode 46477, Mean R = -10.1  Std R = 4.8  Min R = -24.6
PolicyLoss: 2.21
Policy_Entropy: 0.209
Policy_KL: 0.00497
Policy_SD: 0.551
Steps: 1.16e+04
TotalSt

ADVA:  (18424,) (35213,) 0.5232158577797973
ADV1:  0.00027685281821780445 0.00013927671055903078 0.006061137182174685 0.04566186602937011 -0.05299624729682973
ADVB:  (20516,) (35213,) 0.5826257348138472
ADV2:  0.15128205510466367 0.39773397320158144 0.5452880704903029 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3467   0.1362   0.5838  80.7417  29.0566  22.3960
***** Episode 46694, Mean R = -10.9  Std R = 6.3  Min R = -32.8
PolicyLoss: 2.15
Policy_Entropy: 0.209
Policy_KL: 0.00703
Policy_SD: 0.542
Steps: 1.18e+04
TotalSteps: 1.7e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00835


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0011   0.0038   5.5684   2.2087   2.1342
ADVA:  (19622,) (35154,) 0.5581726119360528
ADV1:  0.0 -0.0006655853298181601 0.007235878570599996 0.05918911569281132 -0.08085390379352453
ADVB:  (18254,) (35154,) 0.519258121408659
ADV2:  0.03217172121987828 0.29844050372247477 0.45308110234797466 3.0 0.0
Policy

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0014   0.0050   5.5684   2.2087   2.1342
ADVA:  (19009,) (35356,) 0.5376456612738998
ADV1:  0.0007673587342753013 -0.00011878964199711108 0.008344796076293074 0.054705141696841486 -0.09428709128402157
ADVB:  (22264,) (35356,) 0.6297092431270506
ADV2:  0.25013456202112583 0.40188742706224373 0.4765346179070906 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3097   0.5976   2.9214  80.7417  29.0566  22.3960
***** Episode 46942, Mean R = -10.6  Std R = 5.3  Min R = -23.7
PolicyLoss: 2.02
Policy_Entropy: 0.209
Policy_KL: 0.0059
Policy_SD: 0.551
Steps: 1.18e+04
TotalSteps: 1.71e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.00633


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0017   5.5684   2.2087   2.1342
ADVA:  (19073,) (35270,) 0.5407711936489935
ADV1:  0.0016434695492645191 0.0007045115090006307 0.008107872007586022 0.0732403183418221 -0.09428709128402157
ADVB

seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -0.97 |    0.98    1.00
cs_angles |  0.0031  0.0024 |  0.0767  0.0763 | -0.9841 -0.9694 |  0.9843  0.9984
optical_flow |  0.0001 -0.0001 |  0.0199  0.0198 | -1.1361 -0.9986 |  0.9618  1.0705
v_err    | -0.0110 |  0.0590 | -0.4529 |  0.1042
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -15.91 |    4.64 |  -37.52 |   -8.72
steps    |     379 |      20 |     336 |     420
***** Episode 47190, Mean R = -11.4  Std R = 5.0  Min R = -21.4
PolicyLoss: 1.96
Policy_Entropy: 0.21
Policy_KL: 0.00667
Policy_SD: 0.549
Steps: 1.19e+04
TotalSteps: 1.72e+07
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.0017


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0012   0.0050   5.5684   2.2087   2.1342
ADVA:  (19395,) (35351,) 0.5486407739526463
ADV1:  0.00023143193481862858 8.407580600210208e-05 0.007674337561081216 0.0703937

attitude |   -0.00   -0.04   -0.09 |    1.22    0.67    1.81 |   -3.14   -1.54   -3.14 |    3.14    1.56    3.14
w        |    0.00    0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.04   -0.12 |    0.67    1.81 |   -1.48   -3.13 |    1.52    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.03   -0.02 |    0.04    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.23 |    0.26 |    0.00 |    1.73
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.99   -0.98 |    0.94    0.99
cs_angles |  0.0021  0.0002 |  0.0736  0.0784 | -0.9911 -0.9821 |  0.9409  0.9946
optical_flow | -0.0001 -0.0001 |  0.0203  0.0188 | -0.9688 -1.1171 |  0.9964  1.0657
v_err    | -0.0112 |  0.0595 | -0.4572 |  0.1065
landing_rewards |    9.61 |    1.93 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5737   0.2710   1.1032  80.7417  29.0566  22.3960
Update Cnt = 1540    ET =   1380.6   Stats:  Mean, Std, Min, Max
r_f      |   -4.77    0.37    3.98 |  193.15  169.72  196.48 | -384.73 -385.99 -376.75 |  394.98  395.48  399.11
v_f      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.11   -0.09   -0.10 |    0.10    0.10    0.12
r_i      |    6.59    5.35   21.37 |  702.18  652.13  756.55 |-1310.97-1319.56-1312.63 | 1343.43 1325.60 1292.13
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.10    0.10
norm_rf  |    0.16 |    0.07 |    0.03 |    0.38
norm_vf  |    0.08 |    0.01 |    0.02 |    0.13
gs_f     |    1.27 |    1.89 |    0.00 |   19.70
thrust   |    0.00   -0.00   -0.00 |    0.67    0.69    0.68 |   -3.43   -3.46   -3.38 |    3.42    3.42    3.37
norm_thrust |    0.92 |    0.74 |    0.00 |    3.46
fuel     |    1.56 |    0.21 |    1.16 |    2.28
rewards  |  -10.85 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4082   0.1611   0.6740  80.7417  29.0566  22.3960
***** Episode 48058, Mean R = -10.5  Std R = 4.8  Min R = -21.4
PolicyLoss: 1.83
Policy_Entropy: 0.211
Policy_KL: 0.00603
Policy_SD: 0.541
Steps: 1.15e+04
TotalSteps: 1.75e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.000734


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0015   5.5684   2.2087   2.1342
ADVA:  (18747,) (34992,) 0.5357510288065843
ADV1:  0.0 -0.0001525543239190001 0.0067044573147017564 0.053024515513177495 -0.05509114383792456
ADVB:  (17543,) (34992,) 0.5013431641518061
ADV2:  0.0019317660598578076 0.3183295518112219 0.5075581548727341 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4468   0.1654   0.8406  80.7417  29.0566  22.3960
***** Episode 48089, Mean R = -9.2  Std R = 4.5  Min R = -24.0
PolicyLoss: 1.98
Policy_Entropy: 0.211
Policy_KL: 0.00577
Policy_SD: 0.545
Steps: 1.16e+04
TotalSteps: 1.75e+07


Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7710   0.8610   3.4590  80.7417  29.0566  22.3960
***** Episode 48306, Mean R = -10.4  Std R = 4.7  Min R = -23.4
PolicyLoss: 2.06
Policy_Entropy: 0.212
Policy_KL: 0.00823
Policy_SD: 0.536
Steps: 1.18e+04
TotalSteps: 1.76e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.00279


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0013   0.0051   5.5684   2.2087   2.1342
ADVA:  (19604,) (35193,) 0.5570425936976103
ADV1:  0.0008171392990277832 0.0002112366107206356 0.008213737524144348 0.05231486313142769 -0.07841259228938435
ADVB:  (21557,) (35193,) 0.6125365839797687
ADV2:  0.2275557467426062 0.42434124671075496 0.5231666243969034 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9394   0.8902   3.6564  80.7417  29.0566  22.3960
***** Episode 48337, Mean R = -10.3  Std R = 4.9  Min R = -19.6
PolicyLoss: 2.17
Policy_Entropy: 0.212
Policy_KL: 0.00698
Policy_SD: 0.538
Steps: 1.18e+04
TotalSte

ADVA:  (21282,) (34924,) 0.6093803688008247
ADV1:  5.206181905416385e-05 -0.00013424570631662564 0.008044397704152946 0.06042605102939419 -0.07403629591359628
ADVB:  (17539,) (34924,) 0.5022047875386554
ADV2:  0.004811245763266681 0.3199007986667008 0.4868927371034866 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5513   0.2532   1.2386  80.7417  29.0566  22.3960
***** Episode 48554, Mean R = -10.6  Std R = 5.0  Min R = -24.4
PolicyLoss: 1.99
Policy_Entropy: 0.212
Policy_KL: 0.00574
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 1.77e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.000725


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0042   0.0026   0.0095   5.5684   2.2087   2.1342
ADVA:  (22601,) (35267,) 0.6408540561998469
ADV1:  -2.3278933795983087e-05 -0.0018708171072880453 0.009016724059544482 0.07730716675747201 -0.07403629591359628
ADVB:  (15517,) (35267,) 0.4399863895426319
ADV2:  0.0 0.2076256785016023 0.36241954397139997 3.0 0.0


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0014   5.5684   2.2087   2.1342
ADVA:  (19700,) (35290,) 0.5582317937092661
ADV1:  9.259042348026651e-05 -0.00025795588689492516 0.0070103514614065455 0.06884982540286183 -0.0736464749788629
ADVB:  (19159,) (35290,) 0.5429016718617172
ADV2:  0.07507784886563307 0.33212369606469955 0.4832060364145443 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3716   0.1224   0.6486  80.7417  29.0566  22.3960
***** Episode 48802, Mean R = -9.4  Std R = 4.2  Min R = -20.4
PolicyLoss: 1.91
Policy_Entropy: 0.213
Policy_KL: 0.00587
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 1.78e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.00179


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0020   5.5684   2.2087   2.1342
ADVA:  (20774,) (35193,) 0.5902878413320831
ADV1:  0.0 -0.0007876041986271482 0.007984979630302057 0.04878229687621999 -0.11232767421363493
ADVB:  (18735,) (3519

cs_angles |  0.0019  0.0001 |  0.0741  0.0752 | -0.9937 -0.9866 |  0.9987  0.9923
optical_flow |  0.0001 -0.0000 |  0.0201  0.0203 | -1.0980 -1.3739 |  1.0406  1.1765
v_err    | -0.0111 |  0.0589 | -0.4551 |  0.1017
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.01
tracking_rewards |  -15.73 |    4.28 |  -42.16 |   -9.07
steps    |     379 |      20 |     334 |     415
***** Episode 49050, Mean R = -10.9  Std R = 7.0  Min R = -38.7
PolicyLoss: 1.92
Policy_Entropy: 0.214
Policy_KL: 0.00556
Policy_SD: 0.536
Steps: 1.16e+04
TotalSteps: 1.79e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.00236


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0014   5.5684   2.2087   2.1342
ADVA:  (20825,) (35402,) 0.588243602056381
ADV1:  0.00041518141566830764 -0.00022600582205665464 0.008401659061092887 0.06452612786565409 -0.08457617160005859
ADVB:  (20468,) (35402,) 0.5781594260211288
ADV2:  

thrust   |   -0.01   -0.00   -0.00 |    0.67    0.70    0.69 |   -3.42   -3.45   -3.40 |    3.41    3.33    3.45
norm_thrust |    0.92 |    0.75 |    0.00 |    3.46
fuel     |    1.57 |    0.21 |    1.12 |    2.31
rewards  |  -11.35 |    5.96 |  -38.63 |   -3.12
fuel_rewards |   -4.49 |    0.61 |   -6.62 |   -3.20
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    2.89 |   12.76 |    0.01 |  153.46
norm_af  |    1.72 |    0.90 |    0.13 |    3.39
norm_wf  |    0.02 |    0.01 |    0.00 |    0.06
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |   -0.03   -0.01   -0.02 |    1.17    0.67    1.83 |   -3.14   -1.57   -3.14 |    3.14    1.54    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.02   -0.03 |    0.67

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.8177   2.5657  10.5862  80.7417  29.0566  22.3960
***** Episode 49639, Mean R = -10.9  Std R = 5.9  Min R = -26.2
PolicyLoss: 2.52
Policy_Entropy: 0.213
Policy_KL: 0.00603
Policy_SD: 0.533
Steps: 1.18e+04
TotalSteps: 1.81e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.00327


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0022   5.5684   2.2087   2.1342
ADVA:  (19862,) (35191,) 0.5644056719047483
ADV1:  -0.0008151027226958197 -0.0021581990111445995 0.007549183401900809 0.05565888474505795 -0.07726087658476116
ADVB:  (15019,) (35191,) 0.4267852575942713
ADV2:  0.0 0.21872825770754326 0.3803272053432572 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5613   0.2559   1.1069  80.7417  29.0566  22.3960
Update Cnt = 1600    ET =   1386.7   Stats:  Mean, Std, Min, Max
r_f      |  -10.56   -9.36    0.77 |  178.67  160.30  210.13 | -387.68 -367.76 -376.34 |  388.01  374.07  377.8

ADVA:  (20891,) (34985,) 0.5971416321280549
ADV1:  0.0018743327161123086 0.0009600921487614905 0.00819080644939373 0.043149114054841925 -0.06250859757256866
ADVB:  (23111,) (34985,) 0.6605973988852365
ADV2:  0.30925267447705657 0.4517627322938296 0.5105095221491619 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4173   0.1799   0.6923  80.7417  29.0566  22.3960
***** Episode 49887, Mean R = -10.5  Std R = 4.8  Min R = -21.8
PolicyLoss: 2.13
Policy_Entropy: 0.214
Policy_KL: 0.0045
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps: 1.82e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00453


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0015   0.0055   5.5684   2.2087   2.1342
ADVA:  (19415,) (35042,) 0.5540494264026026
ADV1:  0.0004894091623476914 0.0001981415759511485 0.007381681828470233 0.04546110946531562 -0.07029146649256979
ADVB:  (20687,) (35042,) 0.5903487243878774
ADV2:  0.19034454290488995 0.4212234780459746 0.5413748632651197 3

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0029   5.5684   2.2087   2.1342
ADVA:  (21299,) (35350,) 0.6025176803394625
ADV1:  0.00045467694882465014 -0.0006712869275851713 0.008505137594378766 0.04906135096780506 -0.0831791018037098
ADVB:  (20121,) (35350,) 0.5691937765205092
ADV2:  0.1112096271056712 0.2908403753466504 0.39610359912077 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3639   0.1465   0.7512  80.7417  29.0566  22.3960
***** Episode 50135, Mean R = -11.3  Std R = 5.2  Min R = -22.5
PolicyLoss: 1.59
Policy_Entropy: 0.214
Policy_KL: 0.00486
Policy_SD: 0.526
Steps: 1.19e+04
TotalSteps: 1.83e+07
VF_0_ExplainedVarNew: 0.954
VF_0_ExplainedVarOld: 0.951
VF_0_Loss : 0.00194


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0012   5.5684   2.2087   2.1342
ADVA:  (22372,) (35286,) 0.6340191577396135
ADV1:  0.0001780180822931396 -0.0008898786469850055 0.00918078491427154 0.048638801387434816 -0.1198980180038196
ADVB:  (

***** Episode 50352, Mean R = -10.1  Std R = 5.3  Min R = -23.0
PolicyLoss: 1.73
Policy_Entropy: 0.213
Policy_KL: 0.00705
Policy_SD: 0.534
Steps: 1.17e+04
TotalSteps: 1.83e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.00326


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0035   5.5684   2.2087   2.1342
ADVA:  (19597,) (35241,) 0.5560852416219744
ADV1:  0.0003130311327504461 -0.0003116627541538182 0.007769129939936245 0.06706481493455269 -0.06942164185108057
ADVB:  (21257,) (35241,) 0.603189466814222
ADV2:  0.16042316094791725 0.3523378800294212 0.4621621331159412 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9731   0.4267   2.0960  80.7417  29.0566  22.3960
***** Episode 50383, Mean R = -10.0  Std R = 5.3  Min R = -28.1
PolicyLoss: 1.82
Policy_Entropy: 0.213
Policy_KL: 0.00881
Policy_SD: 0.538
Steps: 1.17e+04
TotalSteps: 1.84e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.0019


ValFun  Gradie

***** Episode 50600, Mean R = -11.2  Std R = 6.3  Min R = -33.3
PolicyLoss: 1.45
Policy_Entropy: 0.215
Policy_KL: 0.0072
Policy_SD: 0.536
Steps: 1.16e+04
TotalSteps: 1.84e+07
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.00321


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0025   5.5684   2.2087   2.1342
ADVA:  (21737,) (34990,) 0.6212346384681338
ADV1:  0.000862987756706431 -0.000135157228610278 0.008745149941815704 0.046693201549953545 -0.07451892382704767
ADVB:  (20343,) (34990,) 0.5813946841954845
ADV2:  0.13920005464302088 0.33266805321346393 0.43376629578117526 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7233   0.3361   1.4415  80.7417  29.0566  22.3960
***** Episode 50631, Mean R = -10.1  Std R = 5.0  Min R = -25.9
PolicyLoss: 1.78
Policy_Entropy: 0.215
Policy_KL: 0.00582
Policy_SD: 0.53
Steps: 1.16e+04
TotalSteps: 1.85e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.00229


ValFun  Gradie

attitude |   -0.01    0.02   -0.09 |    1.21    0.69    1.87 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.01   -0.14 |    0.69    1.87 |   -1.47   -3.14 |    1.45    3.08
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.04   -0.01 |    0.03    0.03    0.03
w_rewards |   -0.00 |    0.00 |   -0.03 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.25 |    0.00 |    1.51
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.92   -0.99 |    0.99    0.99
cs_angles |  0.0007  0.0006 |  0.0726  0.0755 | -0.9202 -0.9915 |  0.9862  0.9933
optical_flow | -0.0000 -0.0002 |  0.0206  0.0208 | -0.8263 -1.1510 |  1.1322  1.1401
v_err    | -0.0109 |  0.0591 | -0.4532 |  0.0969
landing_rewards |    9.35 |    2.46 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0146   0.5449   2.1158  80.7417  29.0566  22.3960
Update Cnt = 1650    ET =   1360.0   Stats:  Mean, Std, Min, Max
r_f      |   11.62    1.46   -7.80 |  196.82  165.25  198.35 | -394.57 -389.49 -399.10 |  392.05  373.74  395.75
v_f      |   -0.00    0.00    0.00 |    0.05    0.04    0.05 |   -0.12   -0.09   -0.10 |    0.11    0.13    0.09
r_i      |   44.07  -12.71  -34.66 |  706.93  645.39  760.22 |-1336.44-1305.83-1362.81 | 1357.29 1297.20 1351.54
v_i      |   -0.00    0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.15 |    0.07 |    0.03 |    0.38
norm_vf  |    0.08 |    0.02 |    0.04 |    0.16
gs_f     |    1.59 |    4.60 |    0.01 |   66.34
thrust   |   -0.00   -0.00    0.01 |    0.67    0.68    0.67 |   -3.34   -3.38   -3.37 |    3.46    3.44    3.44
norm_thrust |    0.90 |    0.74 |    0.00 |    3.46
fuel     |    1.53 |    0.19 |    1.12 |    2.12
rewards  |  -10.12 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6927   0.3221   1.4866  80.7417  29.0566  22.3960
***** Episode 51468, Mean R = -10.5  Std R = 7.0  Min R = -31.3
PolicyLoss: 2.15
Policy_Entropy: 0.215
Policy_KL: 0.00723
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 1.88e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.00155


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0014   0.0055   5.5684   2.2087   2.1342
ADVA:  (18380,) (34856,) 0.5273123708974065
ADV1:  0.0 -0.00048254798757786747 0.008125195272465932 0.05207069379654877 -0.06218012371307874
ADVB:  (21022,) (34856,) 0.6031099380307551
ADV2:  0.16239241331521187 0.37054195744519836 0.5034273955899977 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5500   0.2677   1.0162  80.7417  29.0566  22.3960
***** Episode 51499, Mean R = -9.5  Std R = 3.7  Min R = -20.1
PolicyLoss: 1.91
Policy_Entropy: 0.215
Policy_KL: 0.00617
Policy_SD: 0.54
Steps: 1.16e+04
TotalSteps: 1.88e+07
VF_0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7456   0.3890   1.5041  80.7417  29.0566  22.3960
***** Episode 51716, Mean R = -11.5  Std R = 5.8  Min R = -29.2
PolicyLoss: 1.57
Policy_Entropy: 0.216
Policy_KL: 0.00609
Policy_SD: 0.542
Steps: 1.17e+04
TotalSteps: 1.89e+07
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.959
VF_0_Loss : 0.00157


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0028   5.5684   2.2087   2.1342
ADVA:  (19879,) (35368,) 0.5620617507351278
ADV1:  0.0 -0.001288395075762076 0.008116242079675132 0.0467558111243363 -0.0831024819524476
ADVB:  (17403,) (35368,) 0.4920549649400588
ADV2:  0.0 0.2555632850434854 0.40951139763256955 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3889   0.1610   0.8918  80.7417  29.0566  22.3960
***** Episode 51747, Mean R = -9.0  Std R = 3.9  Min R = -22.0
PolicyLoss: 1.6
Policy_Entropy: 0.216
Policy_KL: 0.00692
Policy_SD: 0.534
Steps: 1.19e+04
TotalSteps: 1.89e+07
VF_0_ExplainedVarNew: 0.

ADVA:  (19357,) (35358,) 0.547457435375304
ADV1:  0.0 -0.0005194064176062739 0.008248668586566393 0.05406170529220633 -0.06270758115583266
ADVB:  (18876,) (35358,) 0.5338537247581877
ADV2:  0.06707451456727309 0.3734614172032261 0.5521356757445696 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4694   0.2215   0.9887  80.7417  29.0566  22.3960
***** Episode 51964, Mean R = -11.3  Std R = 4.9  Min R = -23.6
PolicyLoss: 2.18
Policy_Entropy: 0.215
Policy_KL: 0.00597
Policy_SD: 0.548
Steps: 1.2e+04
TotalSteps: 1.9e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.00144


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0013   5.5684   2.2087   2.1342
ADVA:  (21361,) (35152,) 0.6076752389622212
ADV1:  0.0 -0.0015584777962348822 0.008830667301983567 0.035908587428989835 -0.0825264401478793
ADVB:  (17000,) (35152,) 0.4836140191169777
ADV2:  0.0 0.25170929911141326 0.40551215279114694 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0005   0.0021   5.5684   2.2087   2.1342
ADVA:  (21538,) (35072,) 0.6141081204379562
ADV1:  0.00013306170349834754 -0.0010903569392788988 0.008728140077559756 0.03796236626629551 -0.06226638706193219
ADVB:  (19598,) (35072,) 0.5587933394160584
ADV2:  0.0824644221260393 0.26775566097997433 0.38023614691755764 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4510   0.1699   0.8094  80.7417  29.0566  22.3960
***** Episode 52212, Mean R = -10.7  Std R = 5.8  Min R = -31.3
PolicyLoss: 1.48
Policy_Entropy: 0.215
Policy_KL: 0.00648
Policy_SD: 0.545
Steps: 1.15e+04
TotalSteps: 1.9e+07
VF_0_ExplainedVarNew: 0.96
VF_0_ExplainedVarOld: 0.958
VF_0_Loss : 0.0162


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0047   0.0028   0.0102   5.5684   2.2087   2.1342
ADVA:  (22421,) (35104,) 0.6387021422060164
ADV1:  0.00142532060926019 0.00023568560615319734 0.009105413150563334 0.04434089199871369 -0.06226638706193219
ADVB:  

seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.98   -1.00 |    1.00    0.97
cs_angles |  0.0020  0.0020 |  0.0737  0.0742 | -0.9838 -0.9976 |  0.9996  0.9661
optical_flow |  0.0001 -0.0001 |  0.0218  0.0194 | -1.0671 -1.0352 |  1.1901  1.0758
v_err    | -0.0112 |  0.0595 | -0.4528 |  0.1037
landing_rewards |    9.32 |    2.51 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -15.33 |    4.34 |  -44.90 |   -8.39
steps    |     377 |      20 |     337 |     420
***** Episode 52460, Mean R = -11.2  Std R = 5.6  Min R = -24.9
PolicyLoss: 1.35
Policy_Entropy: 0.215
Policy_KL: 0.00602
Policy_SD: 0.551
Steps: 1.18e+04
TotalSteps: 1.91e+07
VF_0_ExplainedVarNew: 0.957
VF_0_ExplainedVarOld: 0.953
VF_0_Loss : 0.00142


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0010   0.0043   5.5684   2.2087   2.1342
ADVA:  (20951,) (35390,) 0.5920033907883583
ADV1:  0.0005647081410856394 -0.0003404368438351938 0.008235810926632389 0.06795

att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |   -0.04    0.01    0.20 |    1.13    0.66    1.79 |   -3.14   -1.57   -3.14 |    3.14    1.53    3.14
w        |   -0.00    0.00    0.00 |    0.01    0.00    0.00 |   -0.05   -0.06   -0.05 |    0.05    0.05    0.05
a_f      |    0.00    0.14 |    0.66    1.80 |   -1.42   -3.14 |    1.52    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.03   -0.02 |    0.03    0.03    0.03
w_rewards |   -0.00 |    0.00 |   -0.06 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.23 |    0.26 |    0.00 |    1.49
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.98   -0.99 |    0.96    0.96
cs_angles |  0.0027  0.0016 |  0.0735  0.0778 | -0.9797 -0.9919 |  0.9556  0.9605
optical_flow | -0.0000 -0.0001 |  0.0215  0.0182 | -0.9954 -0.9372 |  1.0819  0.8825
v_err    | -0.0112 |  0.0598 | -0.4525 |  0.0966
landing_rewards |    9.71

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4879   0.1561   0.7752  80.7417  29.0566  22.3960
Update Cnt = 1710    ET =   1284.9   Stats:  Mean, Std, Min, Max
r_f      |   16.65   24.38   -1.16 |  188.67  171.27  194.71 | -390.84 -376.44 -382.43 |  386.42  385.35  387.86
v_f      |   -0.00   -0.01   -0.00 |    0.05    0.04    0.05 |   -0.13   -0.10   -0.09 |    0.09    0.10    0.09
r_i      |   37.86   97.08   13.09 |  709.89  632.95  752.65 |-1349.89-1332.58-1338.35 | 1340.89 1350.09 1299.80
v_i      |   -0.00   -0.01   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.08    0.09
norm_rf  |    0.15 |    0.07 |    0.03 |    0.37
norm_vf  |    0.08 |    0.01 |    0.04 |    0.13
gs_f     |    1.31 |    3.91 |    0.01 |   65.92
thrust   |    0.00    0.00    0.00 |    0.66    0.67    0.68 |   -3.26   -3.30   -3.39 |    3.43    3.28    3.46
norm_thrust |    0.90 |    0.74 |    0.00 |    3.46
fuel     |    1.52 |    0.21 |    1.12 |    2.36
rewards  |   -9.98 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4045   0.6368   2.5610  80.7417  29.0566  22.3960
***** Episode 53328, Mean R = -9.2  Std R = 3.8  Min R = -21.5
PolicyLoss: 2.29
Policy_Entropy: 0.216
Policy_KL: 0.00929
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 1.95e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00242


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0013   0.0051   5.5684   2.2087   2.1342
ADVA:  (19037,) (35130,) 0.5419015086820381
ADV1:  0.0 -0.00018808571156550107 0.006078489031319529 0.0652768535794494 -0.06359326058021186
ADVB:  (18426,) (35130,) 0.5245089666951324
ADV2:  0.05319535171171997 0.35279053211480565 0.5198063885782166 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3525   0.1653   0.7618  80.7417  29.0566  22.3960
***** Episode 53359, Mean R = -9.6  Std R = 5.4  Min R = -22.5
PolicyLoss: 2.06
Policy_Entropy: 0.217
Policy_KL: 0.00815
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 1.95e+07
VF_0_E

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7147   0.2939   1.2747  80.7417  29.0566  22.3960
***** Episode 53576, Mean R = -11.3  Std R = 5.2  Min R = -23.9
PolicyLoss: 1.97
Policy_Entropy: 0.217
Policy_KL: 0.00835
Policy_SD: 0.54
Steps: 1.16e+04
TotalSteps: 1.96e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.00139


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0009   0.0031   5.5684   2.2087   2.1342
ADVA:  (20623,) (34994,) 0.5893295993598903
ADV1:  0.0013833686627109966 0.0005379763726614826 0.009256389854658273 0.07542652645339315 -0.10006074295284317
ADVB:  (21545,) (34994,) 0.6156769731954049
ADV2:  0.22590597702167153 0.41433011409653187 0.515471257677793 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7390   0.2810   1.4324  80.7417  29.0566  22.3960
***** Episode 53607, Mean R = -10.6  Std R = 3.5  Min R = -18.9
PolicyLoss: 2.07
Policy_Entropy: 0.217
Policy_KL: 0.00627
Policy_SD: 0.54
Steps: 1.19e+04
TotalSteps

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7797   0.3271   1.5976  80.7417  29.0566  22.3960
***** Episode 53824, Mean R = -9.9  Std R = 5.9  Min R = -28.9
PolicyLoss: 2.6
Policy_Entropy: 0.217
Policy_KL: 0.0091
Policy_SD: 0.542
Steps: 1.18e+04
TotalSteps: 1.97e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00576


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0017   0.0067   5.5684   2.2087   2.1342
ADVA:  (19595,) (35311,) 0.5549262269547733
ADV1:  0.0 -0.000938296446725337 0.0071786271838132675 0.04286763137297511 -0.08932186699879441
ADVB:  (17639,) (35311,) 0.4995327235139192
ADV2:  0.0 0.2616780765324037 0.4189989928284902 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2627   0.0876   0.4297  80.7417  29.0566  22.3960
***** Episode 53855, Mean R = -11.2  Std R = 7.9  Min R = -32.3
PolicyLoss: 1.6
Policy_Entropy: 0.216
Policy_KL: 0.00771
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 1.97e+07
VF_0_ExplainedVarNew: 0.

ADVA:  (19621,) (35390,) 0.5544221531506075
ADV1:  0.0013520250631484437 0.0006804542260147209 0.008918869364700955 0.053424424485981864 -0.08608362364136435
ADVB:  (22982,) (35390,) 0.6493924837524725
ADV2:  0.27129739712771095 0.45736315195806965 0.5558126531329294 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6675   0.3105   1.2901  80.7417  29.0566  22.3960
***** Episode 54072, Mean R = -10.4  Std R = 4.9  Min R = -24.4
PolicyLoss: 2.18
Policy_Entropy: 0.218
Policy_KL: 0.00658
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 1.98e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.00299


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0045   5.5684   2.2087   2.1342
ADVA:  (19690,) (35578,) 0.5534318961155771
ADV1:  0.0010580650664412175 0.0010223680924574604 0.007433607692733844 0.06349067964086613 -0.08381487680633481
ADVB:  (20932,) (35578,) 0.5883411096745179
ADV2:  0.1846450077780693 0.4679994366477691 0.626093742769123 

optical_flow |  0.0000 -0.0002 |  0.0201  0.0190 | -0.9759 -1.0087 |  1.1786  0.9811
v_err    | -0.0110 |  0.0597 | -0.4515 |  0.1157
landing_rewards |    9.52 |    2.15 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -15.46 |    4.83 |  -35.75 |   -8.36
steps    |     381 |      20 |     334 |     417
***** Episode 54320, Mean R = -9.2  Std R = 5.3  Min R = -26.0
PolicyLoss: 1.76
Policy_Entropy: 0.218
Policy_KL: 0.00589
Policy_SD: 0.529
Steps: 1.17e+04
TotalSteps: 1.98e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.00173


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0015   5.5684   2.2087   2.1342
ADVA:  (20778,) (35376,) 0.5873473541383989
ADV1:  0.0008024562637003818 6.855401312693058e-07 0.00819071799893948 0.04220898684303262 -0.058473979484461924
ADVB:  (21813,) (35376,) 0.6166044776119403
ADV2:  0.19109986100649368 0.3625137821978563 0.4567757978931404 3.0 0.0
Policy  Gradients: 

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.06   -0.05 |    0.05    0.05    0.05
a_f      |    0.03    0.09 |    0.63    1.88 |   -1.46   -3.13 |    1.44    3.13
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.04   -0.02 |    0.04    0.02    0.03
w_rewards |   -0.00 |    0.00 |   -0.06 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.23 |    0.27 |    0.00 |    1.43
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.94   -0.94 |    0.97    1.00
cs_angles |  0.0021  0.0005 |  0.0776  0.0772 | -0.9430 -0.9409 |  0.9733  0.9957
optical_flow |  0.0000 -0.0001 |  0.0201  0.0201 | -1.1909 -1.0055 |  0.9258  1.4660
v_err    | -0.0113 |  0.0600 | -0.4522 |  0.0958
landing_rewards |    9.58 |    2.00 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -15.87 |    4.42 |  -33.69 |   -8.87
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3661   0.1510   0.7544  80.7417  29.0566  22.3960
Update Cnt = 1770    ET =   1219.3   Stats:  Mean, Std, Min, Max
r_f      |   -9.73  -10.23  -10.46 |  185.16  167.29  212.09 | -390.14 -374.34 -391.26 |  373.06  368.56  379.90
v_f      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.08    0.11    0.10
r_i      |  -61.68  -27.57  -38.47 |  683.44  635.22  793.84 |-1335.85-1256.35-1310.71 | 1269.37 1309.54 1304.25
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.14 |    0.07 |    0.01 |    0.40
norm_vf  |    0.08 |    0.01 |    0.03 |    0.12
gs_f     |    1.32 |    1.87 |    0.01 |   20.00
thrust   |    0.00    0.01    0.00 |    0.66    0.68    0.68 |   -3.21   -3.46   -3.39 |    3.42    3.46    3.43
norm_thrust |    0.91 |    0.73 |    0.00 |    3.46
fuel     |    1.51 |    0.19 |    1.06 |    2.30
rewards  |  -10.35 

ADVA:  (21197,) (35312,) 0.6002775260534663
ADV1:  0.00042331549743680925 -0.00023662979507250092 0.007915307805663156 0.07511155526724955 -0.10490395021052679
ADVB:  (19956,) (35312,) 0.5651336656094246
ADV2:  0.10609940129359224 0.3132348373464926 0.43786554700676455 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9062   0.3696   1.6308  80.7417  29.0566  22.3960
***** Episode 55188, Mean R = -11.0  Std R = 6.4  Min R = -25.9
PolicyLoss: 1.71
Policy_Entropy: 0.218
Policy_KL: 0.00831
Policy_SD: 0.542
Steps: 1.18e+04
TotalSteps: 2.02e+07
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.961
VF_0_Loss : 0.00412


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0013   0.0052   5.5684   2.2087   2.1342
ADVA:  (21210,) (35427,) 0.5986959098992294
ADV1:  0.0010453453960526209 0.0002990848769726884 0.008306484520439111 0.07511155526724955 -0.10490395021052679
ADVB:  (21722,) (35427,) 0.6131481638298473
ADV2:  0.20208283578264408 0.3740379154135908 0.4836278042779

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0019   0.0072   5.5684   2.2087   2.1342
ADVA:  (19204,) (35199,) 0.5455836813545839
ADV1:  0.0 -0.0003394277145871636 0.008081920231268929 0.07225113320404758 -0.07965119608124227
ADVB:  (19946,) (35199,) 0.5666638256768658
ADV2:  0.1278163267888876 0.3822970658206703 0.5103708131784315 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7654   0.3014   1.5372  80.7417  29.0566  22.3960
***** Episode 55436, Mean R = -10.9  Std R = 6.7  Min R = -33.0
PolicyLoss: 2.08
Policy_Entropy: 0.218
Policy_KL: 0.00774
Policy_SD: 0.542
Steps: 1.18e+04
TotalSteps: 2.03e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.00654


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0027   5.5684   2.2087   2.1342
ADVA:  (19095,) (35048,) 0.5448242410408582
ADV1:  0.000872145612924563 0.00029094397013174365 0.00851634177334983 0.07225113320404758 -0.10828106394133508
ADVB:  (22167,) (35048,) 

***** Episode 55653, Mean R = -11.8  Std R = 4.2  Min R = -21.8
PolicyLoss: 2.16
Policy_Entropy: 0.216
Policy_KL: 0.0309
Policy_SD: 0.549
Steps: 1.18e+04
TotalSteps: 2.04e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.00307


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0021   5.5684   2.2087   2.1342
ADVA:  (21589,) (35226,) 0.6128711747005053
ADV1:  0.0013737812390479838 0.00026339969758963866 0.007988161026836362 0.052470482704384036 -0.06562105930728501
ADVB:  (22861,) (35226,) 0.6489808664054959
ADV2:  0.24278578603240591 0.3731783807643816 0.43219881116114583 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.9864   3.7243  15.4511  80.7417  29.0566  22.3960
***** Episode 55684, Mean R = -10.5  Std R = 5.0  Min R = -24.5
PolicyLoss: 1.77
Policy_Entropy: 0.218
Policy_KL: 0.0112
Policy_SD: 0.551
Steps: 1.17e+04
TotalSteps: 2.04e+07
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.00465


ValFun  Grad

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5165   0.2044   0.9854  80.7417  29.0566  22.3960
***** Episode 55901, Mean R = -10.7  Std R = 6.0  Min R = -23.5
PolicyLoss: 1.93
Policy_Entropy: 0.218
Policy_KL: 0.00842
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 2.04e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.00198


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0015   0.0055   5.5684   2.2087   2.1342
ADVA:  (18919,) (35240,) 0.536861520998865
ADV1:  0.0005959831051266708 0.0004039128080855635 0.0069610129428569305 0.04737143737973326 -0.060167150128631636
ADVB:  (20444,) (35240,) 0.5801362088535755
ADV2:  0.15545620101155078 0.4057786837195319 0.5529820805430418 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2522   0.0822   0.4398  80.7417  29.0566  22.3960
***** Episode 55932, Mean R = -9.4  Std R = 5.1  Min R = -27.5
PolicyLoss: 2.15
Policy_Entropy: 0.218
Policy_KL: 0.00798
Policy_SD: 0.539
Steps: 1.18e+04
TotalStep

seeker_angles |   -0.00    0.00 |    0.07    0.07 |   -0.99   -0.98 |    1.00    0.97
cs_angles | -0.0001  0.0013 |  0.0739  0.0730 | -0.9868 -0.9779 |  0.9974  0.9718
optical_flow | -0.0000 -0.0001 |  0.0186  0.0170 | -0.9862 -0.9660 |  0.9629  0.8434
v_err    | -0.0109 |  0.0595 | -0.4532 |  0.1046
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.03
tracking_rewards |  -15.29 |    4.24 |  -32.09 |   -8.02
steps    |     378 |      21 |     334 |     420
***** Episode 56180, Mean R = -11.0  Std R = 5.4  Min R = -25.2
PolicyLoss: 1.91
Policy_Entropy: 0.219
Policy_KL: 0.00701
Policy_SD: 0.546
Steps: 1.17e+04
TotalSteps: 2.06e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.00184


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0014   0.0049   5.5684   2.2087   2.1342
ADVA:  (18116,) (35240,) 0.5140749148694665
ADV1:  0.0007692570373560859 0.0005301018342691303 0.006494053469266096 0.036646

attitude |    0.06   -0.02    0.01 |    1.21    0.66    1.85 |   -3.14   -1.54   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.01    0.01 |    0.66    1.83 |   -1.31   -3.14 |    1.52    3.12
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.02 |    0.04    0.04    0.04
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.22 |    0.26 |    0.00 |    1.34
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.95   -0.99 |    0.99    0.99
cs_angles |  0.0006  0.0019 |  0.0693  0.0789 | -0.9543 -0.9857 |  0.9928  0.9851
optical_flow |  0.0000 -0.0001 |  0.0195  0.0190 | -1.0117 -1.0065 |  1.0366  0.9070
v_err    | -0.0110 |  0.0596 | -0.4529 |  0.1002
landing_rewards |    9.65 |    1.85 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2553   0.0932   0.4640  80.7417  29.0566  22.3960
Update Cnt = 1830    ET =   1178.4   Stats:  Mean, Std, Min, Max
r_f      |  -13.86   -3.15   -6.49 |  185.13  164.68  211.92 | -391.93 -360.37 -391.71 |  391.50  374.17  396.28
v_f      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.11 |    0.10    0.09    0.09
r_i      |  -46.01  -14.11  -33.34 |  662.80  666.78  778.79 |-1333.79-1224.22-1295.85 | 1309.56 1312.71 1320.67
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.10    0.09    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.36
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.30 |    1.65 |    0.01 |   11.34
thrust   |    0.00    0.01   -0.00 |    0.66    0.68    0.67 |   -3.17   -3.37   -3.38 |    3.33    3.37    3.46
norm_thrust |    0.90 |    0.73 |    0.00 |    3.46
fuel     |    1.50 |    0.18 |    1.08 |    2.02
rewards  |  -10.20 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.9949   3.1243  13.2943  80.7417  29.0566  22.3960
***** Episode 57048, Mean R = -9.3  Std R = 4.3  Min R = -21.7
PolicyLoss: 2.27
Policy_Entropy: 0.219
Policy_KL: 0.01
Policy_SD: 0.539
Steps: 1.19e+04
TotalSteps: 2.09e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.00126


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0015   0.0054   5.5684   2.2087   2.1342
ADVA:  (19754,) (35172,) 0.5616399408620494
ADV1:  0.0 -8.261118723341651e-05 0.008403359477389828 0.07258556485514978 -0.11857860799041708
ADVB:  (18841,) (35172,) 0.5356817923348118
ADV2:  0.07567685552183065 0.3981254811400949 0.5655362240406305 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9096   0.9810   4.3211  80.7417  29.0566  22.3960
***** Episode 57079, Mean R = -9.3  Std R = 5.4  Min R = -28.8
PolicyLoss: 2.27
Policy_Entropy: 0.219
Policy_KL: 0.00904
Policy_SD: 0.54
Steps: 1.16e+04
TotalSteps: 2.09e+07
VF_0_Expla

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5531   0.8030   3.4399  80.7417  29.0566  22.3960
***** Episode 57296, Mean R = -9.4  Std R = 4.4  Min R = -25.2
PolicyLoss: 2.49
Policy_Entropy: 0.22
Policy_KL: 0.00692
Policy_SD: 0.529
Steps: 1.17e+04
TotalSteps: 2.1e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.000285


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0024   5.5684   2.2087   2.1342
ADVA:  (18477,) (35042,) 0.5272815478568574
ADV1:  0.0006013508758946776 0.00034118925749024103 0.006246125988314444 0.06687222874957127 -0.08765575366382522
ADVB:  (20764,) (35042,) 0.5925460875520804
ADV2:  0.20219213402019873 0.42542291296908363 0.5432603504363539 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3806   0.1230   0.6902  80.7417  29.0566  22.3960
***** Episode 57327, Mean R = -10.3  Std R = 5.6  Min R = -22.1
PolicyLoss: 2.19
Policy_Entropy: 0.22
Policy_KL: 0.00623
Policy_SD: 0.536
Steps: 1.16e+04
TotalStep

ADVA:  (19273,) (35316,) 0.5457299807452712
ADV1:  0.0 -0.0006230578837898548 0.006406156895562673 0.07376220878068007 -0.05297569430746468
ADVB:  (17281,) (35316,) 0.48932495186317815
ADV2:  0.0 0.3264509922783515 0.5272524006129858 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6423   0.2770   1.0518  80.7417  29.0566  22.3960
***** Episode 57544, Mean R = -10.1  Std R = 5.7  Min R = -22.3
PolicyLoss: 2.03
Policy_Entropy: 0.22
Policy_KL: 0.00688
Policy_SD: 0.532
Steps: 1.16e+04
TotalSteps: 2.11e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.000375


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0042   5.5684   2.2087   2.1342
ADVA:  (20284,) (35203,) 0.5762008919694345
ADV1:  0.0007409397180167792 0.0003494231086914457 0.006859465410688543 0.04061551297681548 -0.05337137025456673
ADVB:  (20592,) (35203,) 0.5849501462943499
ADV2:  0.16020654304473478 0.3980662357889225 0.5323779943184869 3.0 0.0
Policy  Gradients: u/sd/M

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0009   0.0038   5.5684   2.2087   2.1342
ADVA:  (18567,) (34941,) 0.5313814716235941
ADV1:  -2.1249674310953523e-05 -0.0007228549228768806 0.005854872431295107 0.040402863344071616 -0.05134601964750747
ADVB:  (15875,) (34941,) 0.454337311467903
ADV2:  0.0 0.3382443855478079 0.5578532449071243 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0083   0.4043   1.6884  80.7417  29.0566  22.3960
***** Episode 57792, Mean R = -10.0  Std R = 4.9  Min R = -27.7
PolicyLoss: 2.27
Policy_Entropy: 0.221
Policy_KL: 0.00932
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 2.12e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00128


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0020   5.5684   2.2087   2.1342
ADVA:  (18272,) (35204,) 0.5190319281899785
ADV1:  0.0 -0.00046521256884989834 0.005896988290481379 0.05099414397462759 -0.05503777460409137
ADVB:  (17936,) (35204,) 0.509487558

seeker_angles |   -0.00   -0.00 |    0.08    0.08 |   -1.00   -0.99 |    0.99    0.98
cs_angles | -0.0007 -0.0009 |  0.0760  0.0779 | -0.9955 -0.9900 |  0.9865  0.9772
optical_flow |  0.0001  0.0001 |  0.0204  0.0187 | -1.1105 -0.9845 |  0.9254  1.1204
v_err    | -0.0113 |  0.0601 | -0.4548 |  0.0962
landing_rewards |    9.68 |    1.77 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -15.58 |    4.80 |  -37.33 |   -8.35
steps    |     378 |      20 |     334 |     418
***** Episode 58040, Mean R = -9.7  Std R = 5.6  Min R = -26.6
PolicyLoss: 2.4
Policy_Entropy: 0.221
Policy_KL: 0.00542
Policy_SD: 0.54
Steps: 1.18e+04
TotalSteps: 2.13e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.00294


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0039   0.0025   0.0094   5.5684   2.2087   2.1342
ADVA:  (17019,) (35164,) 0.4839893072460471
ADV1:  0.00016612182517661538 -0.0002575250707490441 0.007450514623675621 0.0650194

thrust   |   -0.00   -0.00    0.00 |    0.67    0.66    0.68 |   -3.44   -3.37   -3.46 |    3.39    3.43    3.43
norm_thrust |    0.90 |    0.73 |    0.00 |    3.46
fuel     |    1.52 |    0.20 |    1.04 |    2.14
rewards  |  -10.33 |    5.27 |  -35.88 |   -0.88
fuel_rewards |   -4.35 |    0.58 |   -6.15 |   -3.00
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    3.02 |   13.45 |    0.00 |  428.91
norm_af  |    1.75 |    0.85 |    0.11 |    3.25
norm_wf  |    0.02 |    0.01 |    0.00 |    0.05
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |   -0.15    0.01   -0.11 |    1.16    0.69    1.84 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.01   -0.10 |    0.69

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7645   0.3553   1.5522  80.7417  29.0566  22.3960
***** Episode 58629, Mean R = -10.2  Std R = 6.1  Min R = -33.4
PolicyLoss: 1.95
Policy_Entropy: 0.22
Policy_KL: 0.00652
Policy_SD: 0.533
Steps: 1.18e+04
TotalSteps: 2.15e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.000829


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0018   0.0067   5.5684   2.2087   2.1342
ADVA:  (18673,) (35223,) 0.5301365584987082
ADV1:  0.0013161228894846902 0.0008659523533937433 0.0076983940199752176 0.059408369837516045 -0.10211275004155707
ADVB:  (23378,) (35223,) 0.6637140504783806
ADV2:  0.3118178175505841 0.48005504743047517 0.5494816314896704 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.9259   2.4061   9.2146  80.7417  29.0566  22.3960
Update Cnt = 1890    ET =   1348.8   Stats:  Mean, Std, Min, Max
r_f      |  -13.39   16.85   -8.49 |  194.99  170.72  193.45 | -395.97 -365.89 -377.27 |  392.57

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4514   0.1667   0.8223  80.7417  29.0566  22.3960
***** Episode 58877, Mean R = -10.8  Std R = 6.6  Min R = -29.1
PolicyLoss: 1.52
Policy_Entropy: 0.219
Policy_KL: 0.00723
Policy_SD: 0.531
Steps: 1.17e+04
TotalSteps: 2.16e+07
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.961
VF_0_Loss : 0.000461


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0041   0.0025   0.0089   5.5684   2.2087   2.1342
ADVA:  (20229,) (35067,) 0.5768671400461973
ADV1:  0.0012547769748224337 0.0006970583588609098 0.007830558584239809 0.040847480282708015 -0.09512948292223888
ADVB:  (22352,) (35067,) 0.6374083896540907
ADV2:  0.2298440367297993 0.4174341142830613 0.5169607794228986 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6727   0.2676   1.2859  80.7417  29.0566  22.3960
***** Episode 58908, Mean R = -9.4  Std R = 4.1  Min R = -24.0
PolicyLoss: 2
Policy_Entropy: 0.221
Policy_KL: 0.00628
Policy_SD: 0.53
Steps: 1.16e+04
TotalSteps: 

ADVA:  (19844,) (34973,) 0.567409144196952
ADV1:  0.0008716377068475091 0.0003545994359599411 0.00687271830667946 0.04211902933699116 -0.10193769582715206
ADVB:  (21777,) (34973,) 0.622680353415492
ADV2:  0.21553657306749663 0.3844165307729439 0.47472946443324837 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8740   0.4509   1.9287  80.7417  29.0566  22.3960
***** Episode 59125, Mean R = -8.9  Std R = 3.4  Min R = -17.4
PolicyLoss: 1.88
Policy_Entropy: 0.221
Policy_KL: 0.00758
Policy_SD: 0.539
Steps: 1.15e+04
TotalSteps: 2.17e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00328


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0012   0.0046   5.5684   2.2087   2.1342
ADVA:  (20678,) (34981,) 0.5911208941997084
ADV1:  0.0006169065040280072 -0.00010054669047821606 0.007549361201809448 0.04854776327448712 -0.06161384333138803
ADVB:  (20857,) (34981,) 0.5962379577484921
ADV2:  0.16634222752542502 0.34515790241471267 0.4496791937697146 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0007   0.0029   5.5684   2.2087   2.1342
ADVA:  (20870,) (34951,) 0.5971216846442162
ADV1:  0.0012084026668520865 0.0005518809565694976 0.007807710476939554 0.042082908464229435 -0.1391106187656136
ADVB:  (21257,) (34951,) 0.6081943292037424
ADV2:  0.1861957794386635 0.37619655941317864 0.4869757897644685 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2552   0.6523   2.7934  80.7417  29.0566  22.3960
***** Episode 59373, Mean R = -11.0  Std R = 6.2  Min R = -32.3
PolicyLoss: 1.89
Policy_Entropy: 0.221
Policy_KL: 0.00605
Policy_SD: 0.543
Steps: 1.18e+04
TotalSteps: 2.18e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.0124


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0014   0.0052   5.5684   2.2087   2.1342
ADVA:  (19039,) (34958,) 0.5446249785456834
ADV1:  0.0011397279668049088 0.001006969166967005 0.007170816632707616 0.08498410555023955 -0.1391106187656136
ADVB:  (2

***** Episode 59590, Mean R = -9.8  Std R = 4.6  Min R = -22.0
PolicyLoss: 2.13
Policy_Entropy: 0.221
Policy_KL: 0.00671
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 2.18e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00533


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0013   5.5684   2.2087   2.1342
ADVA:  (18319,) (34966,) 0.5239089401132528
ADV1:  0.0 -0.00015309276498017764 0.00565188517252802 0.038057728149614695 -0.057595816332224326
ADVB:  (18310,) (34966,) 0.5236515472172968
ADV2:  0.04832360003445532 0.37431080870008676 0.5543499432664134 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2428   0.0727   0.3984  80.7417  29.0566  22.3960
***** Episode 59621, Mean R = -9.6  Std R = 4.8  Min R = -23.8
PolicyLoss: 2.16
Policy_Entropy: 0.221
Policy_KL: 0.0092
Policy_SD: 0.54
Steps: 1.16e+04
TotalSteps: 2.19e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00353


ValFun  Gradients: u/sd/Max/C M

attitude |    0.01    0.04    0.10 |    1.08    0.65    1.75 |   -3.14   -1.55   -3.14 |    3.14    1.57    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.04    0.10 |    0.64    1.74 |   -1.40   -3.12 |    1.49    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.04   -0.02 |    0.03    0.03    0.04
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.39
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.97   -0.99 |    0.98    0.99
cs_angles |  0.0022  0.0016 |  0.0737  0.0747 | -0.9670 -0.9930 |  0.9817  0.9945
optical_flow |  0.0000  0.0000 |  0.0207  0.0188 | -0.9651 -1.0218 |  1.5920  1.0380
v_err    | -0.0114 |  0.0605 | -0.4528 |  0.0988
landing_rewards |    9.65 |    1.85 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7043   0.3695   1.5714  80.7417  32.4863  22.3960
Update Cnt = 1940    ET =   1388.8   Stats:  Mean, Std, Min, Max
r_f      |    1.59   -2.05    6.95 |  185.81  159.51  207.28 | -391.21 -391.56 -384.60 |  395.93  364.20  396.87
v_f      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.10    0.09
r_i      |   10.46    0.18   40.17 |  684.58  638.21  775.86 |-1346.52-1245.23-1318.28 | 1342.52 1308.32 1350.39
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.09    0.10
norm_rf  |    0.15 |    0.07 |    0.03 |    0.46
norm_vf  |    0.07 |    0.01 |    0.03 |    0.12
gs_f     |    1.22 |    1.44 |    0.00 |   13.23
thrust   |    0.00   -0.00   -0.00 |    0.67    0.66    0.66 |   -3.39   -3.32   -3.41 |    3.40    3.40    3.46
norm_thrust |    0.89 |    0.73 |    0.00 |    3.46
fuel     |    1.49 |    0.20 |    1.01 |    2.72
rewards  |  -10.26 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2911   0.1207   0.6224  80.7417  32.4863  22.3960
***** Episode 60458, Mean R = -11.6  Std R = 5.2  Min R = -26.9
PolicyLoss: 1.9
Policy_Entropy: 0.222
Policy_KL: 0.00577
Policy_SD: 0.538
Steps: 1.17e+04
TotalSteps: 2.22e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00106


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0015   5.5684   2.2087   2.1342
ADVA:  (17875,) (34930,) 0.5117377612367592
ADV1:  0.000530040702966434 0.0005208520000276881 0.005989110008131103 0.05383389571926944 -0.059295187360404455
ADVB:  (20738,) (34930,) 0.5937016890924707
ADV2:  0.2209405367758211 0.48090285898151963 0.6207778946036517 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6288   0.2172   1.0890  80.7417  32.4863  22.3960
***** Episode 60489, Mean R = -8.6  Std R = 3.7  Min R = -22.1
PolicyLoss: 2.47
Policy_Entropy: 0.222
Policy_KL: 0.00534
Policy_SD: 0.539
Steps: 1.15e+04
TotalSteps

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  10.9042   9.6534  35.1941  80.7417  32.4863  22.3960
***** Episode 60706, Mean R = -8.8  Std R = 3.7  Min R = -16.6
PolicyLoss: 2.51
Policy_Entropy: 0.222
Policy_KL: 0.0113
Policy_SD: 0.526
Steps: 1.18e+04
TotalSteps: 2.23e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.00363


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0019   0.0070   5.5684   2.2087   2.1342
ADVA:  (18522,) (35587,) 0.52047095849608
ADV1:  0.0001841519193667776 0.00013326783576777363 0.00664915809225067 0.04639268774246308 -0.09267835977652877
ADVB:  (20064,) (35587,) 0.5638013881473571
ADV2:  0.13938435168021235 0.4471420896038413 0.6028581471932436 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.8984   4.3115  13.8891  80.7417  32.4863  22.3960
***** Episode 60737, Mean R = -10.4  Std R = 5.6  Min R = -26.7
PolicyLoss: 2.41
Policy_Entropy: 0.222
Policy_KL: 0.00909
Policy_SD: 0.523
Steps: 1.19e+04
TotalSteps: 

ADVA:  (20204,) (35408,) 0.5706055128784455
ADV1:  0.0006897183807158579 0.000283846758902739 0.00712048321441614 0.03564769331205203 -0.10062417641408666
ADVB:  (20984,) (35408,) 0.5926344328965205
ADV2:  0.17330409250820936 0.37642236267856244 0.4753469170503516 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6426   0.2428   1.0247  80.7417  32.4863  22.3960
***** Episode 60954, Mean R = -8.9  Std R = 3.8  Min R = -17.7
PolicyLoss: 1.93
Policy_Entropy: 0.223
Policy_KL: 0.00733
Policy_SD: 0.526
Steps: 1.18e+04
TotalSteps: 2.24e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00181


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0038   5.5684   2.2087   2.1342
ADVA:  (20044,) (35456,) 0.5653203971119134
ADV1:  0.0 -0.0005950885366125607 0.007045024456832695 0.03564769331205203 -0.10062417641408666
ADVB:  (18402,) (35456,) 0.5190094765342961
ADV2:  0.03192819412050387 0.29539036763335347 0.4443144103970407 3.0 0.0
Policy  Gra

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0003   0.0010   5.5684   2.2087   2.1342
ADVA:  (20070,) (35520,) 0.5650337837837838
ADV1:  0.0 -0.0005269613284626118 0.006380319956240431 0.03570414173578729 -0.10798855711748256
ADVB:  (18931,) (35520,) 0.5329673423423423
ADV2:  0.06487260403913708 0.31465272065104877 0.4669750839951048 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4905   0.1884   0.9178  80.7417  32.4863  22.3960
***** Episode 61202, Mean R = -9.9  Std R = 5.0  Min R = -20.4
PolicyLoss: 1.79
Policy_Entropy: 0.222
Policy_KL: 0.00602
Policy_SD: 0.522
Steps: 1.17e+04
TotalSteps: 2.25e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.000939


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0009   5.5684   2.2087   2.1342
ADVA:  (21757,) (35203,) 0.6180439167116439
ADV1:  0.0008018658090596775 2.2306291887361744e-06 0.007365565894568097 0.04485347157244146 -0.10798855711748256
ADVB:  (20927,) (3520

cs_angles |  0.0019  0.0030 |  0.0736  0.0733 | -0.9802 -0.9807 |  0.9788  0.9918
optical_flow |  0.0001 -0.0000 |  0.0221  0.0200 | -1.0379 -0.9584 |  1.2201  1.2074
v_err    | -0.0112 |  0.0599 | -0.4535 |  0.1000
landing_rewards |    9.74 |    1.59 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.07 |    0.01
tracking_rewards |  -14.85 |    3.82 |  -33.26 |   -8.91
steps    |     380 |      20 |     338 |     417
***** Episode 61450, Mean R = -10.1  Std R = 5.4  Min R = -28.5
PolicyLoss: 2.13
Policy_Entropy: 0.223
Policy_KL: 0.00625
Policy_SD: 0.529
Steps: 1.19e+04
TotalSteps: 2.25e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.0015


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0033   5.5684   2.2087   2.1342
ADVA:  (19986,) (35581,) 0.561704280374357
ADV1:  0.0 -0.0002644298171247597 0.005921994065631284 0.03515928054447093 -0.04956893842289403
ADVB:  (18171,) (35581,) 0.5106939096708918
ADV2:  0.020098042428191357 0

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.03   -0.07 |    0.65    1.75 |   -1.48   -3.12 |    1.40    3.10
w_f      |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.04   -0.02 |    0.03    0.04    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.40
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -0.94 |    0.99    0.98
cs_angles |  0.0036  0.0033 |  0.0761  0.0761 | -0.9803 -0.9422 |  0.9863  0.9803
optical_flow |  0.0001 -0.0001 |  0.0197  0.0199 | -0.9734 -1.1656 |  1.1055  1.1482
v_err    | -0.0110 |  0.0599 | -0.4525 |  0.1024
landing_rewards |    9.61 |    1.93 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.08 |    0.02
tracking_rewards |  -15.04 |    4.35 |  -34.43 |   -8.29
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2810   0.1172   0.5223  96.9288  38.5936  26.9699
Update Cnt = 2000    ET =   1455.0   Stats:  Mean, Std, Min, Max
r_f      |   -3.10   -7.82   -6.68 |  189.69  168.25  201.37 | -388.61 -374.68 -386.40 |  390.53  387.45  384.32
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.11 |    0.09    0.12    0.10
r_i      |  -17.98  -12.41  -31.40 |  696.87  639.31  782.78 |-1339.84-1287.77-1317.21 | 1339.03 1332.01 1351.66
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.10    0.09    0.10
norm_rf  |    0.15 |    0.07 |    0.02 |    0.42
norm_vf  |    0.07 |    0.01 |    0.03 |    0.12
gs_f     |    1.69 |    4.78 |    0.00 |   61.02
thrust   |   -0.00    0.00    0.00 |    0.67    0.66    0.66 |   -3.26   -3.45   -3.45 |    3.42    2.98    3.44
norm_thrust |    0.88 |    0.73 |    0.00 |    3.46
fuel     |    1.49 |    0.17 |    1.04 |    2.05
rewards  |   -9.76 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2958   0.1091   0.5717  96.9288  38.5936  26.9699
***** Episode 62318, Mean R = -10.7  Std R = 4.6  Min R = -23.5
PolicyLoss: 2.15
Policy_Entropy: 0.224
Policy_KL: 0.00519
Policy_SD: 0.533
Steps: 1.18e+04
TotalSteps: 2.29e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.000273


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0024   5.5684   2.2087   2.1342
ADVA:  (21346,) (35356,) 0.6037447675076366
ADV1:  0.0005642900360287324 -6.827294150281299e-06 0.006439943356074774 0.05714842729201364 -0.06433511467919623
ADVB:  (19668,) (35356,) 0.5562846475845684
ADV2:  0.10055184350109408 0.31737497062240566 0.4422732929153699 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6440   0.2837   1.3618  96.9288  38.5936  26.9699
***** Episode 62349, Mean R = -8.7  Std R = 4.3  Min R = -21.6
PolicyLoss: 1.72
Policy_Entropy: 0.224
Policy_KL: 0.00578
Policy_SD: 0.533
Steps: 1.17e+04
TotalS

ADVA:  (21662,) (34946,) 0.6198706575859898
ADV1:  0.0002487846656332735 -0.0005291992959591058 0.0077287875996238445 0.05053887922947381 -0.11103417264319654
ADVB:  (18568,) (34946,) 0.5313340582613174
ADV2:  0.049281047490764285 0.2826964329851173 0.42220573221602997 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4967   0.2513   0.9455  96.9288  38.5936  26.9699
***** Episode 62566, Mean R = -9.8  Std R = 5.2  Min R = -22.5
PolicyLoss: 1.61
Policy_Entropy: 0.224
Policy_KL: 0.00518
Policy_SD: 0.528
Steps: 1.17e+04
TotalSteps: 2.3e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.000654


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0034   0.0021   0.0080   5.5684   2.2087   2.1342
ADVA:  (21725,) (34976,) 0.6211402104300091
ADV1:  0.000856841225444444 8.972675037204975e-05 0.008013111209425033 0.05053887922947381 -0.11103417264319654
ADVB:  (20383,) (34976,) 0.5827710430009149
ADV2:  0.13860975939491116 0.3525332623101834 0.466548352930840

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0018   5.5684   2.2087   2.1342
ADVA:  (17709,) (35097,) 0.5045730404308061
ADV1:  0.0 -0.0006321180381888036 0.005389082679100667 0.04206581959424646 -0.07186611443129587
ADVB:  (17706,) (35097,) 0.504487563039576
ADV2:  0.009620102507000364 0.3467475762046393 0.5208930784138841 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5439   0.2420   1.0190  96.9288  38.5936  26.9699
***** Episode 62814, Mean R = -9.6  Std R = 4.1  Min R = -22.9
PolicyLoss: 2.06
Policy_Entropy: 0.224
Policy_KL: 0.00712
Policy_SD: 0.536
Steps: 1.19e+04
TotalSteps: 2.31e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.000493


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0014   5.5684   2.2087   2.1342
ADVA:  (19382,) (35210,) 0.5504686168702073
ADV1:  0.0 -0.0008101488708141063 0.005722538364374016 0.04206581959424646 -0.04711835813116902
ADVB:  (16810,) (35210,) 0.47742118716273

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0028   5.5684   2.2087   2.1342
ADVA:  (18542,) (35039,) 0.5291817688860984
ADV1:  0.0007579579942608794 0.0005180218894182228 0.0066686397983183035 0.04775176713157936 -0.048527822480690735
ADVB:  (22278,) (35039,) 0.6358058163760382
ADV2:  0.24923470630087566 0.4335830700876449 0.5294364689791552 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3795   0.1605   0.7105  96.9288  38.5936  26.9699
***** Episode 63062, Mean R = -9.2  Std R = 4.2  Min R = -19.1
PolicyLoss: 2.06
Policy_Entropy: 0.225
Policy_KL: 0.00607
Policy_SD: 0.528
Steps: 1.18e+04
TotalSteps: 2.32e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00045


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0009   0.0033   5.5684   2.2087   2.1342
ADVA:  (18228,) (35208,) 0.5177232447171097
ADV1:  0.00021262617110640383 0.00022750148865203576 0.005933963162017066 0.0478058629378767 -0.054321938919322466
ADVB

cs_angles |  0.0016  0.0025 |  0.0720  0.0739 | -0.9902 -0.9832 |  0.9496  0.9680
optical_flow | -0.0000  0.0000 |  0.0197  0.0195 | -1.1244 -0.9722 |  0.9650  1.0616
v_err    | -0.0113 |  0.0601 | -0.4533 |  0.1078
landing_rewards |    9.71 |    1.68 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.02
tracking_rewards |  -14.67 |    4.04 |  -31.27 |   -8.17
steps    |     377 |      21 |     335 |     419
***** Episode 63310, Mean R = -9.4  Std R = 5.0  Min R = -23.3
PolicyLoss: 1.67
Policy_Entropy: 0.225
Policy_KL: 0.0084
Policy_SD: 0.528
Steps: 1.16e+04
TotalSteps: 2.32e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00137


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0016   0.0062   5.5684   2.2087   2.1342
ADVA:  (19644,) (34953,) 0.5620118444768689
ADV1:  0.0007935599043281307 0.000470431766146553 0.007118100561082961 0.058201083233296924 -0.08031655996451548
ADVB:  (21638,) (34953,) 0.6190598804108374
ADV2:  0.21

w        |    0.00    0.00   -0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.06    0.05 |    0.63    1.90 |   -1.48   -3.14 |    1.40    3.13
w_f      |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.02 |    0.03    0.03    0.03
w_rewards |   -0.00 |    0.00 |   -0.01 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.68
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.92   -1.00 |    0.99    1.00
cs_angles |  0.0035  0.0031 |  0.0724  0.0770 | -0.9167 -0.9976 |  0.9909  0.9952
optical_flow |  0.0001  0.0000 |  0.0192  0.0188 | -0.9759 -1.1188 |  1.0051  1.0365
v_err    | -0.0113 |  0.0604 | -0.4537 |  0.1101
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -15.17 |    4.21 |  -31.96 |   -8.12
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4140   0.1711   0.7731  96.9288  38.5936  26.9699
Update Cnt = 2060    ET =   1511.8   Stats:  Mean, Std, Min, Max
r_f      |  -16.66   14.94    2.69 |  186.68  157.94  203.45 | -393.03 -386.58 -387.74 |  388.84  362.86  386.79
v_f      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.10    0.09    0.10
r_i      |  -31.35   30.23    4.52 |  695.02  607.82  784.79 |-1298.77-1228.48-1257.86 | 1303.31 1318.28 1289.94
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.15 |    0.06 |    0.02 |    0.35
norm_vf  |    0.08 |    0.02 |    0.03 |    0.12
gs_f     |    1.23 |    1.35 |    0.01 |    9.68
thrust   |   -0.00   -0.00   -0.00 |    0.66    0.67    0.66 |   -3.37   -3.44   -3.45 |    3.45    3.43    3.45
norm_thrust |    0.89 |    0.73 |    0.00 |    3.46
fuel     |    1.48 |    0.20 |    1.03 |    2.64
rewards  |   -9.86 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2888   0.1171   0.5564  96.9288  38.5936  26.9699
***** Episode 64178, Mean R = -10.7  Std R = 6.0  Min R = -33.8
PolicyLoss: 2.07
Policy_Entropy: 0.225
Policy_KL: 0.00592
Policy_SD: 0.543
Steps: 1.15e+04
TotalSteps: 2.36e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.000707


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0015   0.0054   5.5684   2.2087   2.1342
ADVA:  (21200,) (34904,) 0.6073802429520971
ADV1:  5.227046001907501e-05 -0.0003438989903374096 0.007496389491481179 0.05593352747986702 -0.07253082758598023
ADVB:  (17780,) (34904,) 0.5093972037588815
ADV2:  0.014356437979483445 0.28053013163313395 0.43209074819074883 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4239   0.1567   0.7236  96.9288  38.5936  26.9699
***** Episode 64209, Mean R = -10.7  Std R = 5.7  Min R = -21.8
PolicyLoss: 1.64
Policy_Entropy: 0.225
Policy_KL: 0.00876
Policy_SD: 0.541
Steps: 1.18e+04
Tot

ADVA:  (21865,) (35210,) 0.6209883555808009
ADV1:  0.0007624157598217279 -0.00022201517285879642 0.008418798566677627 0.06232397861307326 -0.07884701981220005
ADVB:  (20306,) (35210,) 0.5767111616018177
ADV2:  0.11849192534627662 0.2987404875315989 0.409333630851787 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3451   0.1460   0.7013  96.9288  38.5936  26.9699
***** Episode 64426, Mean R = -8.8  Std R = 4.1  Min R = -18.4
PolicyLoss: 1.55
Policy_Entropy: 0.226
Policy_KL: 0.0077
Policy_SD: 0.531
Steps: 1.18e+04
TotalSteps: 2.37e+07
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.000474


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0016   5.5684   2.2087   2.1342
ADVA:  (20513,) (35469,) 0.5783360117285516
ADV1:  0.0009454999428456113 0.00018626744825424633 0.007985639950012228 0.06232397861307326 -0.07884701981220005
ADVB:  (22481,) (35469,) 0.6338210831994135
ADV2:  0.2130480730579451 0.3745304562320888 0.46996920810755194

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0014   5.5684   2.2087   2.1342
ADVA:  (20810,) (35468,) 0.586726062929965
ADV1:  0.00014952566257065564 -0.00041666535801434596 0.007376185364371808 0.07350181506628384 -0.07069806473081869
ADVB:  (19353,) (35468,) 0.5456467801962332
ADV2:  0.080548454544112 0.30162138122864507 0.43136421723308577 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7553   0.3997   1.5683  96.9288  38.5936  26.9699
***** Episode 64674, Mean R = -9.2  Std R = 4.3  Min R = -22.7
PolicyLoss: 1.66
Policy_Entropy: 0.226
Policy_KL: 0.00695
Policy_SD: 0.527
Steps: 1.19e+04
TotalSteps: 2.38e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.00067


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0009   0.0034   5.5684   2.2087   2.1342
ADVA:  (19407,) (35332,) 0.5492754443563908
ADV1:  0.001428125918982252 0.0006094154623935924 0.0074693167537778415 0.03684734459952205 -0.0995766456612609
ADVB: 

***** Episode 64891, Mean R = -11.0  Std R = 4.5  Min R = -20.7
PolicyLoss: 1.86
Policy_Entropy: 0.225
Policy_KL: 0.00775
Policy_SD: 0.54
Steps: 1.19e+04
TotalSteps: 2.38e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.00113


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0011   0.0040   5.5684   2.2087   2.1342
ADVA:  (23170,) (35538,) 0.6519781642185829
ADV1:  0.0007291511507402258 -0.00037126963811711245 0.009180277549650239 0.061510392874800146 -0.08494512591906983
ADVB:  (19798,) (35538,) 0.5570938150711914
ADV2:  0.08219016410700906 0.28357727172417413 0.41099023398166984 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4746   0.1696   0.8241  96.9288  38.5936  26.9699
***** Episode 64922, Mean R = -9.4  Std R = 5.1  Min R = -22.1
PolicyLoss: 1.53
Policy_Entropy: 0.226
Policy_KL: 0.00644
Policy_SD: 0.532
Steps: 1.18e+04
TotalSteps: 2.39e+07
VF_0_ExplainedVarNew: 0.958
VF_0_ExplainedVarOld: 0.954
VF_0_Loss : 0.00108


ValFun  G

w_f      |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.03   -0.04   -0.02 |    0.03    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.49
seeker_angles |    0.00    0.00 |    0.08    0.07 |   -0.97   -1.00 |    0.98    0.99
cs_angles |  0.0032  0.0014 |  0.0755  0.0726 | -0.9667 -0.9974 |  0.9758  0.9876
optical_flow | -0.0000 -0.0000 |  0.0201  0.0186 | -1.0747 -0.9370 |  1.0510  0.9163
v_err    | -0.0114 |  0.0600 | -0.4528 |  0.0990
landing_rewards |    9.39 |    2.40 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -15.01 |    4.12 |  -33.38 |   -8.67
steps    |     379 |      20 |     339 |     420
***** Episode 65170, Mean R = -9.4  Std R = 5.2  Min R = -26.8
PolicyLoss: 2.33
Policy_Entropy: 0.226
Policy_KL: 0.0097
Policy_SD: 0.529
Steps: 1.16e+04
TotalSteps: 2.

Update Cnt = 2110    ET =   1411.7   Stats:  Mean, Std, Min, Max
r_f      |   -7.86    3.45    2.04 |  186.07  165.39  202.91 | -392.74 -363.45 -397.35 |  390.46  376.32  391.31
v_f      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.09    0.09
r_i      |  -11.19   -4.19    1.92 |  696.19  664.55  748.93 |-1307.02-1311.09-1274.90 | 1292.55 1267.90 1353.71
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.10    0.09    0.10
norm_rf  |    0.14 |    0.07 |    0.03 |    0.52
norm_vf  |    0.08 |    0.01 |    0.04 |    0.13
gs_f     |    1.25 |    1.92 |    0.00 |   23.86
thrust   |   -0.00    0.00   -0.00 |    0.67    0.67    0.66 |   -3.42   -3.29   -3.36 |    3.42    3.46    3.45
norm_thrust |    0.90 |    0.73 |    0.00 |    3.46
fuel     |    1.49 |    0.21 |    1.08 |    2.37
rewards  |   -9.49 |    5.14 |  -42.19 |   -2.03
fuel_rewards |   -4.27 |    0.59 |   -6.78 |   -3.10
glideslope_rewards |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.1989   2.6505  10.1918  96.9288  38.5936  26.9699
***** Episode 65728, Mean R = -9.5  Std R = 4.6  Min R = -22.0
PolicyLoss: 1.93
Policy_Entropy: 0.226
Policy_KL: 0.00903
Policy_SD: 0.536
Steps: 1.18e+04
TotalSteps: 2.42e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.00173


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0021   5.5684   2.2087   2.1342
ADVA:  (21922,) (35204,) 0.6227133280309056
ADV1:  0.00042644160173783453 -0.0005888751664463693 0.008589890627744895 0.06721914637019333 -0.12118533987574309
ADVB:  (19810,) (35204,) 0.5627201454380184
ADV2:  0.08973160934981089 0.27361168160318283 0.4021134526612171 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8057   0.4038   1.6558  96.9288  38.5936  26.9699
***** Episode 65759, Mean R = -10.4  Std R = 5.7  Min R = -29.9
PolicyLoss: 1.46
Policy_Entropy: 0.226
Policy_KL: 0.00712
Policy_SD: 0.533
Steps: 1.17e+04
TotalS

ADVA:  (20053,) (35215,) 0.5694448388470822
ADV1:  0.0008855595727404299 0.0003327219074342999 0.00788182805294357 0.09843791897987059 -0.10336671217271098
ADVB:  (21658,) (35215,) 0.6150220076671873
ADV2:  0.19095957133620664 0.3812389022880806 0.49573103049464573 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4771   0.1856   0.8914  96.9288  38.5936  26.9699
***** Episode 65976, Mean R = -9.3  Std R = 4.7  Min R = -24.0
PolicyLoss: 1.86
Policy_Entropy: 0.226
Policy_KL: 0.00697
Policy_SD: 0.534
Steps: 1.17e+04
TotalSteps: 2.43e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.00294


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0055   5.5684   2.2087   2.1342
ADVA:  (21525,) (35217,) 0.6112104949314252
ADV1:  0.0007345289439780406 -0.00023614827007599032 0.008374722427664967 0.09843791897987059 -0.10169435293464701
ADVB:  (19976,) (35217,) 0.5672260555981486
ADV2:  0.10137905264983135 0.30155127361664724 0.427181198522326

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0010   0.0037   5.5684   2.2087   2.1342
ADVA:  (20041,) (35484,) 0.5647897644008567
ADV1:  0.0008116398072955354 0.0002476036855245843 0.0076597202325917055 0.05099293436202135 -0.0766178462959724
ADVB:  (21485,) (35484,) 0.605484161875775
ADV2:  0.18561059882941264 0.38778214865184707 0.49614311799290817 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4418   0.1676   0.8254  96.9288  38.5936  26.9699
***** Episode 66224, Mean R = -7.9  Std R = 3.1  Min R = -14.9
PolicyLoss: 1.93
Policy_Entropy: 0.226
Policy_KL: 0.00759
Policy_SD: 0.532
Steps: 1.21e+04
TotalSteps: 2.44e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.000411


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0020   0.0075   5.5684   2.2087   2.1342
ADVA:  (19310,) (35738,) 0.5403212267054676
ADV1:  0.0007838914502153538 0.00034520635080247726 0.007523370366804391 0.049513231706079686 -0.0766178462959724
ADVB

***** Episode 66441, Mean R = -8.9  Std R = 3.6  Min R = -20.4
PolicyLoss: 1.98
Policy_Entropy: 0.226
Policy_KL: 0.0072
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 2.44e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.000378


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0005   0.0018   5.5684   2.2087   2.1342
ADVA:  (19807,) (35094,) 0.5643984726733915
ADV1:  0.0007675922838949745 0.00030064647481053806 0.007032972075594756 0.051514431544465056 -0.07183367218625358
ADVB:  (21437,) (35094,) 0.6108451587165897
ADV2:  0.2033150961525784 0.3968681170517185 0.5013307796926293 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3467   0.1487   0.7121  96.9288  38.5936  26.9699
***** Episode 66472, Mean R = -10.3  Std R = 5.0  Min R = -23.7
PolicyLoss: 1.95
Policy_Entropy: 0.227
Policy_KL: 0.0062
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 2.44e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.000495


ValFun  Grad

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.01    0.11 |    0.65    1.87 |   -1.50   -3.13 |    1.47    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.04   -0.02 |    0.03    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.48
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.93   -0.98 |    0.93    1.00
cs_angles |  0.0008  0.0037 |  0.0727  0.0757 | -0.9289 -0.9760 |  0.9290  0.9961
optical_flow |  0.0001 -0.0002 |  0.0172  0.0194 | -1.0271 -1.0174 |  0.8663  1.0360
v_err    | -0.0113 |  0.0603 | -0.4568 |  0.1206
landing_rewards |    9.39 |    2.40 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.04
tracking_rewards |  -15.01 |    4.26 |  -33.39 |   -8.17
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2677   0.1132   0.5688  96.9288  38.5936  26.9699
Update Cnt = 2160    ET =   1455.5   Stats:  Mean, Std, Min, Max
r_f      |    3.53    4.45    4.29 |  189.87  172.35  195.62 | -389.63 -370.09 -385.96 |  389.17  361.49  383.10
v_f      |   -0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.10    0.10    0.10
r_i      |   11.44   14.49   10.52 |  707.14  671.52  732.12 |-1371.66-1341.66-1320.30 | 1306.56 1323.14 1329.94
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.03 |    0.39
norm_vf  |    0.08 |    0.01 |    0.04 |    0.13
gs_f     |    1.17 |    1.88 |    0.01 |   20.81
thrust   |    0.00   -0.00   -0.00 |    0.67    0.67    0.67 |   -3.42   -3.45   -3.36 |    3.37    3.38    3.44
norm_thrust |    0.90 |    0.73 |    0.00 |    3.46
fuel     |    1.49 |    0.19 |    1.06 |    2.11
rewards  |   -9.86 

ADVA:  (22447,) (35273,) 0.6363791001615967
ADV1:  0.00010919542958315841 -0.0004258939961032661 0.008472988204353105 0.04801068975332202 -0.0701954474049226
ADVB:  (17675,) (35273,) 0.501091486406033
ADV2:  0.0025826360306653753 0.30183408747402396 0.46481642316115346 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7814   0.3110   1.4324  96.9288  38.5936  26.9699
***** Episode 67278, Mean R = -10.1  Std R = 5.2  Min R = -23.7
PolicyLoss: 1.81
Policy_Entropy: 0.227
Policy_KL: 0.00983
Policy_SD: 0.533
Steps: 1.18e+04
TotalSteps: 2.48e+07
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.00065


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0018   5.5684   2.2087   2.1342
ADVA:  (22502,) (34986,) 0.6431715543360201
ADV1:  0.0005823213153037018 -0.0005283021310655561 0.009323922788867971 0.04801068975332202 -0.06394140449879043
ADVB:  (19654,) (34986,) 0.5617675641685246
ADV2:  0.10978418637054665 0.32109748800831356 0.43004664322

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0005   5.5684   2.2087   2.1342
ADVA:  (20202,) (35286,) 0.5725216799863969
ADV1:  -0.0003607792702493987 -0.0016800633630743092 0.007712298487848117 0.07195046888056006 -0.07022044876282307
ADVB:  (15955,) (35286,) 0.45216233066938727
ADV2:  0.0 0.2449088355826457 0.4070592082710655 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5263   0.1814   0.8569  96.9288  38.5936  26.9699
***** Episode 67526, Mean R = -8.6  Std R = 4.4  Min R = -20.2
PolicyLoss: 1.62
Policy_Entropy: 0.227
Policy_KL: 0.011
Policy_SD: 0.532
Steps: 1.17e+04
TotalSteps: 2.48e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.00209


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0013   5.5684   2.2087   2.1342
ADVA:  (20164,) (35170,) 0.573329542223486
ADV1:  0.00014374842147825938 -0.00023330513849104272 0.006891491707103891 0.07195046888056006 -0.07022044876282307
ADVB:  (19277,) (351

***** Episode 67743, Mean R = -8.3  Std R = 4.3  Min R = -19.9
PolicyLoss: 1.81
Policy_Entropy: 0.227
Policy_KL: 0.00792
Policy_SD: 0.533
Steps: 1.16e+04
TotalSteps: 2.49e+07
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.000491


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0014   0.0055   5.5684   2.2087   2.1342
ADVA:  (21846,) (35049,) 0.6232988102370967
ADV1:  0.0009730112203855702 -0.00010679957726613996 0.007923321618982649 0.05559534775167729 -0.06953770482409542
ADVB:  (20965,) (35049,) 0.5981625723986419
ADV2:  0.1611763394308722 0.31245246879416994 0.4007263938128306 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3150   0.0962   0.6005  96.9288  38.5936  26.9699
***** Episode 67774, Mean R = -10.0  Std R = 5.6  Min R = -25.2
PolicyLoss: 1.57
Policy_Entropy: 0.227
Policy_KL: 0.00584
Policy_SD: 0.532
Steps: 1.16e+04
TotalSteps: 2.49e+07
VF_0_ExplainedVarNew: 0.958
VF_0_ExplainedVarOld: 0.956
VF_0_Loss : 0.00069


ValFun  Gra

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4661   0.2091   0.9050  96.9288  38.5936  26.9699
***** Episode 67991, Mean R = -9.7  Std R = 4.5  Min R = -23.3
PolicyLoss: 1.78
Policy_Entropy: 0.227
Policy_KL: 0.00963
Policy_SD: 0.54
Steps: 1.16e+04
TotalSteps: 2.5e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.000577


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0020   5.5684   2.2087   2.1342
ADVA:  (20556,) (34938,) 0.5883565172591447
ADV1:  0.0011562699388681739 0.0007888148254205635 0.006495053048549287 0.059403517165217856 -0.07274273157546693
ADVB:  (22040,) (34938,) 0.6308317591161486
ADV2:  0.23097315340418806 0.4023423944911907 0.5010278977902946 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7170   0.2332   1.2927  96.9288  38.5936  26.9699
***** Episode 68022, Mean R = -9.6  Std R = 4.5  Min R = -23.2
PolicyLoss: 1.91
Policy_Entropy: 0.228
Policy_KL: 0.00878
Policy_SD: 0.541
Steps: 1.18e+04
TotalSteps

seeker_angles |    0.00    0.00 |    0.07    0.08 |   -1.00   -1.00 |    0.98    0.97
cs_angles |  0.0011  0.0009 |  0.0726  0.0756 | -0.9980 -0.9955 |  0.9790  0.9735
optical_flow |  0.0001 -0.0001 |  0.0202  0.0193 | -1.0879 -0.9937 |  0.9653  1.3919
v_err    | -0.0113 |  0.0605 | -0.4528 |  0.1002
landing_rewards |    9.58 |    2.00 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -14.99 |    3.98 |  -31.99 |   -8.53
steps    |     379 |      20 |     335 |     418
***** Episode 68270, Mean R = -11.6  Std R = 5.8  Min R = -23.7
PolicyLoss: 1.7
Policy_Entropy: 0.226
Policy_KL: 0.00863
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps: 2.51e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.000261


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0013   0.0050   5.5684   2.2087   2.1342
ADVA:  (21190,) (35329,) 0.5997905403492881
ADV1:  0.0006917213120164467 -6.210876841819794e-05 0.008218743630177024 0.05422

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.01   -0.06 |    0.66    1.89 |   -1.36   -3.13 |    1.44    3.12
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.04   -0.02 |    0.03    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.35
seeker_angles |    0.00   -0.00 |    0.07    0.07 |   -0.98   -1.00 |    0.99    1.00
cs_angles |  0.0017 -0.0004 |  0.0732  0.0728 | -0.9834 -0.9985 |  0.9946  0.9970
optical_flow |  0.0001 -0.0000 |  0.0215  0.0193 | -1.0519 -0.9372 |  1.0430  1.1907
v_err    | -0.0112 |  0.0602 | -0.4548 |  0.1032
landing_rewards |    9.61 |    1.93 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -14.85 |    3.75 |  -29.67 |   -8.73
steps    |     380 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4939   0.1722   0.8529  96.9288  38.5936  26.9699
Update Cnt = 2220    ET =   1331.7   Stats:  Mean, Std, Min, Max
r_f      |  -15.38  -11.28    5.61 |  182.70  169.01  206.20 | -389.76 -390.50 -389.85 |  393.71  371.19  398.41
v_f      |    0.01    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.10    0.09
r_i      |  -91.26  -19.98   46.81 |  654.01  643.31  794.44 |-1323.38-1287.26-1329.59 | 1357.44 1230.89 1333.87
v_i      |    0.01    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.02 |    0.41
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.50 |    2.39 |    0.00 |   25.91
thrust   |   -0.00    0.00   -0.00 |    0.67    0.65    0.66 |   -3.23   -3.06   -3.45 |    3.28    3.29    3.41
norm_thrust |    0.88 |    0.72 |    0.00 |    3.46
fuel     |    1.46 |    0.17 |    1.02 |    2.22
rewards  |   -9.21 

ADVA:  (21633,) (35089,) 0.6165179970931061
ADV1:  0.00018114589282829455 -0.0005164806723106587 0.007521036340154853 0.06914789651742653 -0.0665292443177785
ADVB:  (18316,) (35089,) 0.5219869474764172
ADV2:  0.03404059320358474 0.27048703461193657 0.42289420740238304 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5292   0.2008   0.9129  96.9288  38.5936  26.9699
***** Episode 69138, Mean R = -10.1  Std R = 6.0  Min R = -29.6
PolicyLoss: 1.54
Policy_Entropy: 0.229
Policy_KL: 0.00815
Policy_SD: 0.534
Steps: 1.17e+04
TotalSteps: 2.55e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.000801


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0007   0.0029   5.5684   2.2087   2.1342
ADVA:  (19662,) (35173,) 0.5590083302533193
ADV1:  0.00039111996092838774 -0.00016981490940505908 0.007058220190482731 0.04231663994863216 -0.06015160765501493
ADVB:  (20154,) (35173,) 0.5729963324140676
ADV2:  0.12111165617730969 0.33966419564395495 0.476100526

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0010   0.0037   5.5684   2.2087   2.1342
ADVA:  (20490,) (35147,) 0.5829800551967451
ADV1:  0.0006527980018210975 5.193121628388654e-05 0.007292239693776508 0.05542877843843397 -0.05773502630121147
ADVB:  (21402,) (35147,) 0.6089282157794407
ADV2:  0.17869727621161755 0.35837929490683695 0.4543098099891724 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5475   0.2564   1.1957  96.9288  38.5936  26.9699
***** Episode 69386, Mean R = -9.1  Std R = 4.6  Min R = -23.5
PolicyLoss: 1.75
Policy_Entropy: 0.229
Policy_KL: 0.00687
Policy_SD: 0.533
Steps: 1.16e+04
TotalSteps: 2.55e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.000409


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0006   0.0024   5.5684   2.2087   2.1342
ADVA:  (19515,) (35142,) 0.5553184224005464
ADV1:  0.0004955716019893579 3.491533187423989e-05 0.006646390530258057 0.05542877843843397 -0.0722171865145943
ADVB: 

***** Episode 69603, Mean R = -9.1  Std R = 5.7  Min R = -23.4
PolicyLoss: 1.82
Policy_Entropy: 0.229
Policy_KL: 0.0121
Policy_SD: 0.536
Steps: 1.16e+04
TotalSteps: 2.56e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.00044


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0010   5.5684   2.2087   2.1342
ADVA:  (20177,) (35066,) 0.5754006730165973
ADV1:  0.0007923449778825097 0.00032014038186595456 0.008602268893193914 0.13560521226556688 -0.11747330943566786
ADVB:  (21477,) (35066,) 0.6124736211715052
ADV2:  0.16237378172383765 0.36483917626936324 0.4983291756500873 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.4967   1.1858   5.5181  96.9288  38.5936  26.9699
***** Episode 69634, Mean R = -8.4  Std R = 3.6  Min R = -17.9
PolicyLoss: 1.78
Policy_Entropy: 0.229
Policy_KL: 0.00771
Policy_SD: 0.532
Steps: 1.18e+04
TotalSteps: 2.56e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.000322


ValFun  Gradie

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2586   0.5664   2.6949  96.9288  38.5936  26.9699
***** Episode 69851, Mean R = -9.5  Std R = 5.2  Min R = -22.3
PolicyLoss: 1.72
Policy_Entropy: 0.229
Policy_KL: 0.0091
Policy_SD: 0.533
Steps: 1.17e+04
TotalSteps: 2.57e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.000414


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0013   0.0052   5.5684   2.2087   2.1342
ADVA:  (22613,) (34966,) 0.6467139506949608
ADV1:  0.0015792002698245362 -0.00011549782291486922 0.009587070013369855 0.06476220605602423 -0.09586576683499026
ADVB:  (21521,) (34966,) 0.6154836126522908
ADV2:  0.19545990932985333 0.3390062162791325 0.40884417530762274 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5540   0.2575   1.2166  96.9288  38.5936  26.9699
***** Episode 69882, Mean R = -11.1  Std R = 4.8  Min R = -24.0
PolicyLoss: 1.64
Policy_Entropy: 0.23
Policy_KL: 0.0065
Policy_SD: 0.532
Steps: 1.17e+04
TotalSte

attitude |   -0.04    0.02   -0.10 |    1.22    0.69    1.83 |   -3.14   -1.57   -3.14 |    3.14    1.54    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.04   -0.00 |    0.69    1.84 |   -1.48   -3.12 |    1.47    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.02 |    0.03    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.56
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.97   -0.91 |    0.97    0.96
cs_angles |  0.0013  0.0020 |  0.0722  0.0731 | -0.9726 -0.9093 |  0.9729  0.9589
optical_flow |  0.0001 -0.0001 |  0.0191  0.0186 | -1.0153 -0.9430 |  1.0735  1.1125
v_err    | -0.0115 |  0.0608 | -0.4572 |  0.1070
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8328   0.4120   1.6862  96.9288  38.5936  26.9699
Update Cnt = 2270    ET =   1298.8   Stats:  Mean, Std, Min, Max
r_f      |   -1.42    0.77   21.49 |  178.87  164.36  210.29 | -391.57 -393.56 -381.09 |  390.23  384.46  399.38
v_f      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.10    0.10
r_i      |   12.64   -0.08   49.43 |  676.38  629.51  792.87 |-1388.36-1274.50-1284.21 | 1300.51 1301.68 1369.40
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.14 |    0.07 |    0.02 |    0.43
norm_vf  |    0.08 |    0.01 |    0.04 |    0.11
gs_f     |    1.47 |    2.24 |    0.01 |   21.69
thrust   |   -0.00   -0.00   -0.01 |    0.67    0.66    0.66 |   -3.46   -3.29   -3.38 |    3.37    3.38    3.44
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.48 |    0.17 |    1.10 |    2.17
rewards  |   -9.68 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0908   0.5255   2.4741  96.9288  38.5936  26.9699
***** Episode 70688, Mean R = -8.3  Std R = 4.0  Min R = -19.3
PolicyLoss: 1.68
Policy_Entropy: 0.23
Policy_KL: 0.00658
Policy_SD: 0.53
Steps: 1.17e+04
TotalSteps: 2.6e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.000926


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0011   5.5684   2.2087   2.1342
ADVA:  (20384,) (35249,) 0.5782859088201083
ADV1:  0.0008388550692789883 0.00031610156571926695 0.006694879344154238 0.07632275186738346 -0.0661192215690905
ADVB:  (20896,) (35249,) 0.5928111435785413
ADV2:  0.17323134615261318 0.36877369360025136 0.48332642553144767 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1454   0.5385   2.5035  96.9288  38.5936  26.9699
***** Episode 70719, Mean R = -10.1  Std R = 4.9  Min R = -24.3
PolicyLoss: 1.84
Policy_Entropy: 0.23
Policy_KL: 0.00682
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps

ADVA:  (19423,) (35498,) 0.5471575863428926
ADV1:  0.0008019373436474724 0.0003456345542747137 0.006672348141655921 0.060049743036657 -0.07088726565488462
ADVB:  (21896,) (35498,) 0.6168234830131275
ADV2:  0.212143009523914 0.39146357029364176 0.49144379328799714 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4301   0.1683   0.8661  96.9288  38.5936  26.9699
***** Episode 70936, Mean R = -11.7  Std R = 5.0  Min R = -22.9
PolicyLoss: 1.88
Policy_Entropy: 0.23
Policy_KL: 0.00753
Policy_SD: 0.535
Steps: 1.19e+04
TotalSteps: 2.61e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.000952


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0009   0.0030   5.5684   2.2087   2.1342
ADVA:  (19548,) (35436,) 0.5516423975618016
ADV1:  0.00024793826471963126 -7.340766455741225e-06 0.006676052219644265 0.0461494512932194 -0.07740938861323282
ADVB:  (19532,) (35436,) 0.551190879331753
ADV2:  0.09426391653863458 0.34108146617335633 0.4798976386860491 3

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0017   0.0059   5.5684   2.2087   2.1342
ADVA:  (16421,) (35044,) 0.4685823536126013
ADV1:  0.0010494556534611788 0.0008329007804932905 0.0056213918650568385 0.045060952848684555 -0.056923719011751125
ADVB:  (24059,) (35044,) 0.6865369250085607
ADV2:  0.4184046135917389 0.5706630458219811 0.6146128924993999 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  23.7799  15.0105  44.6423  96.9288  38.5936  26.9699
***** Episode 71184, Mean R = -8.0  Std R = 3.4  Min R = -16.2
PolicyLoss: 2.52
Policy_Entropy: 0.226
Policy_KL: 0.0802
Policy_SD: 0.538
Steps: 1.19e+04
TotalSteps: 2.62e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.000682


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0015   0.0062   5.5684   2.2087   2.1342
ADVA:  (18484,) (35426,) 0.5217636763958674
ADV1:  0.0 -0.0002076326098642819 0.0061922019287887695 0.045060952848684555 -0.054632705776079774
ADVB:  (18783,) (35

***** Episode 71401, Mean R = -7.9  Std R = 3.4  Min R = -17.7
PolicyLoss: 1.95
Policy_Entropy: 0.231
Policy_KL: 0.00586
Policy_SD: 0.529
Steps: 1.17e+04
TotalSteps: 2.63e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00373


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0016   0.0066   5.5684   2.2087   2.1342
ADVA:  (18532,) (35129,) 0.5275413476045433
ADV1:  0.00033343150733146365 0.00014081273870204317 0.006455613029326647 0.062475173300704845 -0.06032581787029284
ADVB:  (20579,) (35129,) 0.5858122918386518
ADV2:  0.17331146653178103 0.41988551797636836 0.5521134126724458 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8177   0.3166   1.5249  96.9288  38.5936  26.9699
***** Episode 71432, Mean R = -9.2  Std R = 4.3  Min R = -22.1
PolicyLoss: 2.13
Policy_Entropy: 0.231
Policy_KL: 0.00555
Policy_SD: 0.526
Steps: 1.17e+04
TotalSteps: 2.63e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00187


ValFun  Gra

a_f      |    0.09   -0.02 |    0.65    1.85 |   -1.44   -3.13 |    1.38    3.12
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.02 |    0.03    0.03    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.33
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.96   -0.95 |    0.99    1.00
cs_angles |  0.0014  0.0031 |  0.0733  0.0738 | -0.9622 -0.9499 |  0.9882  0.9997
optical_flow | -0.0001 -0.0000 |  0.0192  0.0187 | -1.1515 -1.2164 |  1.1697  0.9868
v_err    | -0.0113 |  0.0602 | -0.4520 |  0.0982
landing_rewards |    9.71 |    1.68 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.01
tracking_rewards |  -14.70 |    3.96 |  -27.60 |   -7.95
steps    |     379 |      21 |     333 |     417
***** Episode 71680, Mean R = -9.6  Std R = 4.8  Min R = -22.7
PolicyLoss: 1.84
Policy

Update Cnt = 2320    ET =    883.7   Stats:  Mean, Std, Min, Max
r_f      |   -5.51    4.66   -7.87 |  186.24  164.09  208.65 | -396.23 -390.46 -380.53 |  377.29  383.76  384.71
v_f      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.08    0.09    0.09
r_i      |   -3.66   13.95  -16.04 |  680.86  648.51  787.46 |-1301.11-1257.82-1284.60 | 1338.07 1325.85 1305.74
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.14 |    0.05 |    0.03 |    0.29
norm_vf  |    0.08 |    0.01 |    0.04 |    0.11
gs_f     |    1.53 |    3.37 |    0.00 |   49.64
thrust   |    0.00   -0.01   -0.00 |    0.66    0.67    0.65 |   -3.38   -3.42   -3.38 |    3.41    3.39    3.28
norm_thrust |    0.88 |    0.73 |    0.00 |    3.46
fuel     |    1.47 |    0.18 |    1.01 |    2.04
rewards  |   -9.40 |    5.07 |  -26.88 |   -2.03
fuel_rewards |   -4.20 |    0.52 |   -5.83 |   -2.90
glideslope_rewards |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7705   0.3022   1.5947  96.9288  38.5936  26.9699
***** Episode 72238, Mean R = -9.0  Std R = 4.2  Min R = -17.3
PolicyLoss: 1.91
Policy_Entropy: 0.231
Policy_KL: 0.00564
Policy_SD: 0.532
Steps: 1.2e+04
TotalSteps: 2.66e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.000902


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0049   5.5684   2.2087   2.1342
ADVA:  (18591,) (35476,) 0.5240444243995941
ADV1:  0.000253741690534123 0.00013168773751680933 0.005951492360668591 0.047610334414139976 -0.0776555535047449
ADVB:  (20372,) (35476,) 0.5742473785094148
ADV2:  0.17461944703793217 0.4475795691880396 0.5935665478566704 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9957   0.4825   2.1842  96.9288  38.5936  26.9699
***** Episode 72269, Mean R = -9.3  Std R = 4.2  Min R = -19.0
PolicyLoss: 2.3
Policy_Entropy: 0.231
Policy_KL: 0.00697
Policy_SD: 0.528
Steps: 1.18e+04
TotalSteps: 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.3273   1.2167   5.0315  96.9288  38.5936  26.9699
***** Episode 72486, Mean R = -8.7  Std R = 3.9  Min R = -17.7
PolicyLoss: 1.92
Policy_Entropy: 0.231
Policy_KL: 0.0102
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 2.67e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00124


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0030   5.5684   2.2087   2.1342
ADVA:  (18715,) (35482,) 0.5274505383011104
ADV1:  0.0 -2.1380274401790178e-05 0.0050337301714498866 0.08479360742196101 -0.06616865780077685
ADVB:  (18472,) (35482,) 0.5206019953779382
ADV2:  0.05263881737558775 0.41968012416094436 0.6108732699907813 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3610   0.7427   3.2580  96.9288  38.5936  26.9699
***** Episode 72517, Mean R = -9.5  Std R = 4.8  Min R = -20.1
PolicyLoss: 2.38
Policy_Entropy: 0.23
Policy_KL: 0.00864
Policy_SD: 0.536
Steps: 1.2e+04
TotalSteps: 2.67e+07
VF_0_E

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3486   0.1473   0.7527  96.9288  38.5936  26.9699
***** Episode 72734, Mean R = -8.1  Std R = 4.1  Min R = -20.9
PolicyLoss: 1.7
Policy_Entropy: 0.231
Policy_KL: 0.00738
Policy_SD: 0.526
Steps: 1.18e+04
TotalSteps: 2.68e+07
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.000776


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0006   0.0023   5.5684   2.2087   2.1342
ADVA:  (22998,) (35421,) 0.64927585330736
ADV1:  0.0013687941103245737 0.00010087528154650513 0.008285290977276673 0.04360969103777823 -0.0713687146856053
ADVB:  (21612,) (35421,) 0.6101465232489202
ADV2:  0.16544004656826083 0.3090692358606513 0.37877487904672863 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7163   0.3074   1.2961  96.9288  38.5936  26.9699
***** Episode 72765, Mean R = -10.1  Std R = 4.3  Min R = -21.2
PolicyLoss: 1.5
Policy_Entropy: 0.231
Policy_KL: 0.00637
Policy_SD: 0.525
Steps: 1.17e+04
TotalSteps: 

ADVA:  (20299,) (35237,) 0.576070607599966
ADV1:  0.0 -0.0005025249643822296 0.007120489515620441 0.04436872777410833 -0.10388883029748774
ADVB:  (17107,) (35237,) 0.48548400828674404
ADV2:  0.0 0.27549629855977303 0.44681984774786154 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4682   0.6039   2.8763  96.9288  38.5936  26.9699
***** Episode 72982, Mean R = -10.7  Std R = 5.3  Min R = -26.8
PolicyLoss: 1.68
Policy_Entropy: 0.231
Policy_KL: 0.0113
Policy_SD: 0.531
Steps: 1.18e+04
TotalSteps: 2.69e+07
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.96
VF_0_Loss : 0.00405


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0014   0.0053   5.5684   2.2087   2.1342
ADVA:  (19558,) (35030,) 0.5583214387667713
ADV1:  0.00023189072288908388 6.374634908100441e-05 0.006900655243900133 0.04436872777410833 -0.10388883029748774
ADVB:  (19449,) (35030,) 0.5552098201541535
ADV2:  0.09588299957127157 0.33816272617753274 0.49180709517130644 3.0 0.0
Policy  Gradients: u/s

seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.97   -0.92 |    1.00    1.00
cs_angles |  0.0028  0.0013 |  0.0696  0.0740 | -0.9692 -0.9242 |  0.9979  0.9981
optical_flow |  0.0001  0.0000 |  0.0197  0.0191 | -1.0932 -1.0921 |  1.0697  1.1724
v_err    | -0.0113 |  0.0606 | -0.4567 |  0.1010
landing_rewards |    9.61 |    1.93 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -14.52 |    3.77 |  -28.37 |   -7.74
steps    |     379 |      20 |     332 |     423
***** Episode 73230, Mean R = -8.3  Std R = 4.7  Min R = -19.8
PolicyLoss: 2.02
Policy_Entropy: 0.232
Policy_KL: 0.00694
Policy_SD: 0.524
Steps: 1.18e+04
TotalSteps: 2.7e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.000532


Dynamics: Max Disturbance (m/s^2):  [0.00133134 0.00141043 0.00142314] 0.0024056412678452664
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0022   5.5684   2.2087   2.1342
ADVA:  (20352,) (35263,) 0.577148

attitude |   -0.15   -0.01   -0.18 |    1.16    0.64    1.85 |   -3.14   -1.56   -3.14 |    3.14    1.52    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.02   -0.15 |    0.63    1.86 |   -1.35   -3.14 |    1.39    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.03 |    0.03    0.03    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.29
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.95   -0.99 |    0.96    1.00
cs_angles |  0.0036  0.0037 |  0.0745  0.0736 | -0.9518 -0.9940 |  0.9579  0.9995
optical_flow |  0.0001 -0.0001 |  0.0188  0.0189 | -0.8833 -1.0626 |  0.8832  0.9756
v_err    | -0.0113 |  0.0605 | -0.4547 |  0.1154
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5919   0.7259   2.9481  96.9288  38.5936  26.9699
Update Cnt = 2380    ET =    799.2   Stats:  Mean, Std, Min, Max
r_f      |   10.95  -22.51    1.00 |  191.58  161.28  204.24 | -370.61 -389.57 -374.56 |  393.92  352.69  390.00
v_f      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.10    0.09
r_i      |   23.01  -71.92   15.89 |  678.88  620.64  793.75 |-1256.63-1327.17-1308.62 | 1341.66 1300.80 1276.77
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.08    0.08    0.10
norm_rf  |    0.14 |    0.05 |    0.02 |    0.36
norm_vf  |    0.08 |    0.01 |    0.05 |    0.11
gs_f     |    1.32 |    1.82 |    0.01 |   21.02
thrust   |   -0.00    0.00   -0.00 |    0.65    0.67    0.66 |   -3.42   -3.36   -3.41 |    3.41    3.44    3.36
norm_thrust |    0.88 |    0.73 |    0.00 |    3.46
fuel     |    1.47 |    0.18 |    1.08 |    2.07
rewards  |   -9.59 

ADVA:  (23588,) (34929,) 0.6753127773483352
ADV1:  0.0019523554588291667 0.0010931510962145987 0.009666120657637021 0.07094942754654954 -0.06868073408508008
ADVB:  (21420,) (34929,) 0.6132440092759598
ADV2:  0.19383690004932105 0.4159740784390734 0.5247779314775058 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.5824   1.3002   5.2616  96.9288  38.5936  26.9699
***** Episode 74098, Mean R = -8.7  Std R = 4.5  Min R = -18.6
PolicyLoss: 2.01
Policy_Entropy: 0.233
Policy_KL: 0.00983
Policy_SD: 0.529
Steps: 1.16e+04
TotalSteps: 2.73e+07
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.00046


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0016   5.5684   2.2087   2.1342
ADVA:  (20999,) (34957,) 0.6007094430300083
ADV1:  0.0020770530380636813 0.001981662174564386 0.00792950439959518 0.04134292199021844 -0.06868073408508008
ADVB:  (23593,) (34957,) 0.6749148954429728
ADV2:  0.3366594866491696 0.5552594418674641 0.6373824021794603 3.0 0

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0011   0.0043   5.5684   2.2087   2.1342
ADVA:  (20244,) (35127,) 0.5763088222734648
ADV1:  0.00022380377868406152 -0.0006297326189245703 0.008113816999657 0.03812124200912026 -0.07283228182428642
ADVB:  (20311,) (35127,) 0.5782161869786774
ADV2:  0.12789883801375856 0.31977519711237823 0.4231931769505235 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3509   0.1203   0.7348  96.9288  38.5936  26.9699
***** Episode 74346, Mean R = -9.5  Std R = 5.3  Min R = -24.8
PolicyLoss: 1.63
Policy_Entropy: 0.231
Policy_KL: 0.0103
Policy_SD: 0.531
Steps: 1.16e+04
TotalSteps: 2.74e+07
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.000453


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0032   0.0018   0.0068   5.5684   2.2087   2.1342
ADVA:  (22275,) (34959,) 0.6371749764009268
ADV1:  0.0016425230381327469 0.0005660285146747437 0.008683731827096738 0.05926610235077012 -0.07283228182428642
ADVB:  

Dynamics: Max Disturbance (m/s^2):  [0.00133134 0.00141043 0.00142314] 0.0024056412678452664
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0030   5.5684   2.2087   2.1342
ADVA:  (21641,) (34949,) 0.6192165727202495
ADV1:  0.00023398091672157187 -0.0005570042417013015 0.007859757291248992 0.06485635020470681 -0.07824416677248008
ADVB:  (18551,) (34949,) 0.530802025809036
ADV2:  0.049050164130843926 0.2650639819501376 0.4062969370384206 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6439   0.2727   1.1053  96.9288  38.5936  26.9699
***** Episode 74594, Mean R = -9.9  Std R = 5.5  Min R = -24.5
PolicyLoss: 1.46
Policy_Entropy: 0.233
Policy_KL: 0.00635
Policy_SD: 0.533
Steps: 1.17e+04
TotalSteps: 2.75e+07
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.958
VF_0_Loss : 0.00111


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0010   0.0038   5.5684   2.2087   2.1342
ADVA:  (19389,) (34967,) 0.5544942374238567
ADV1:  0.0008469462993816

***** Episode 74811, Mean R = -9.6  Std R = 6.1  Min R = -29.1
PolicyLoss: 2.1
Policy_Entropy: 0.233
Policy_KL: 0.00819
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 2.76e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.00114


Dynamics: Max Disturbance (m/s^2):  [0.00133134 0.00141043 0.00142314] 0.0024056412678452664
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0015   0.0056   5.5684   2.2087   2.1342
ADVA:  (21199,) (35071,) 0.6044595249636452
ADV1:  0.0007183238797394374 0.000290904669658002 0.008143253160210534 0.07170324137147799 -0.075606900447653
ADVB:  (20383,) (35071,) 0.5811924381968008
ADV2:  0.12835431354835505 0.3485334060419943 0.4902241905338387 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8852   0.3027   1.3919  96.9288  38.5936  26.9699
***** Episode 74842, Mean R = -10.1  Std R = 7.1  Min R = -36.6
PolicyLoss: 1.77
Policy_Entropy: 0.232
Policy_KL: 0.0102
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 2.76e+07
VF_

seeker_angles |    0.00    0.00 |    0.07    0.07 |   -1.00   -0.98 |    0.99    0.97
cs_angles |  0.0038  0.0035 |  0.0748  0.0740 | -0.9978 -0.9788 |  0.9894  0.9681
optical_flow |  0.0001 -0.0001 |  0.0187  0.0190 | -1.0379 -0.9405 |  1.0495  1.1566
v_err    | -0.0110 |  0.0600 | -0.4528 |  0.1016
landing_rewards |    9.52 |    2.15 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -14.71 |    4.09 |  -31.21 |   -7.48
steps    |     378 |      20 |     336 |     420
***** Episode 75090, Mean R = -8.8  Std R = 4.6  Min R = -18.8
PolicyLoss: 1.6
Policy_Entropy: 0.232
Policy_KL: 0.0101
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps: 2.77e+07
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.00047


Dynamics: Max Disturbance (m/s^2):  [0.00133134 0.00141043 0.00142314] 0.0024056412678452664
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0007   5.5684   2.2087   2.1342
ADVA:  (20883,) (35079,) 0.5953134

seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.99   -0.97 |    1.00    0.99
cs_angles |  0.0018  0.0003 |  0.0736  0.0754 | -0.9856 -0.9664 |  0.9994  0.9888
optical_flow |  0.0002 -0.0000 |  0.0191  0.0182 | -1.0918 -0.9399 |  1.0753  0.9901
v_err    | -0.0112 |  0.0607 | -0.4561 |  0.1011
landing_rewards |    9.19 |    2.72 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -14.93 |    4.17 |  -31.98 |   -7.58
steps    |     378 |      20 |     330 |     421
***** Episode 75400, Mean R = -10.6  Std R = 6.2  Min R = -25.4
PolicyLoss: 1.48
Policy_Entropy: 0.233
Policy_KL: 0.00401
Policy_SD: 0.536
Steps: 1.16e+04
TotalSteps: 2.78e+07
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.000692


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0005   0.0020   5.5684   2.2087   2.1342
ADVA:  (20601,) (34726,) 0.5932442550250533
ADV1:  0.0019298465647051791 0.0010574641508609313 0.008184503502762834 0.07324

attitude |   -0.01    0.05    0.02 |    1.27    0.67    1.88 |   -3.14   -1.54   -3.14 |    3.14    1.55    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.05   -0.03 |    0.67    1.90 |   -1.42   -3.14 |    1.55    3.14
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.04   -0.02 |    0.03    0.03    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.62
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.97   -0.94 |    0.99    0.97
cs_angles |  0.0045  0.0049 |  0.0744  0.0742 | -0.9696 -0.9391 |  0.9932  0.9719
optical_flow | -0.0000 -0.0001 |  0.0192  0.0185 | -0.9715 -0.9153 |  1.0978  0.8510
v_err    | -0.0111 |  0.0607 | -0.4532 |  0.0987
landing_rewards |    9.77 |    1.49 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9747   0.7596   4.0260  96.9288  38.5936  26.9699
Update Cnt = 2450    ET =    823.5   Stats:  Mean, Std, Min, Max
r_f      |    2.70    8.35   16.80 |  180.68  176.24  196.50 | -393.21 -369.80 -396.91 |  394.89  372.72  386.52
v_f      |    0.00   -0.00   -0.00 |    0.04    0.05    0.05 |   -0.11   -0.10   -0.10 |    0.09    0.10    0.09
r_i      |   -3.99   44.35   75.72 |  669.53  669.46  758.92 |-1340.18-1340.55-1295.52 | 1253.23 1304.38 1360.56
v_i      |    0.00   -0.00   -0.01 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.01 |    0.39
norm_vf  |    0.08 |    0.01 |    0.05 |    0.13
gs_f     |    1.29 |    1.95 |    0.00 |   20.66
thrust   |    0.01    0.00   -0.00 |    0.68    0.67    0.67 |   -3.29   -3.45   -3.43 |    3.45    3.37    3.45
norm_thrust |    0.91 |    0.72 |    0.00 |    3.46
fuel     |    1.51 |    0.19 |    1.12 |    2.07
rewards  |   -9.77 

ADVA:  (21008,) (34608,) 0.6070272769301895
ADV1:  0.0008717518751931589 0.000201531384703578 0.00731977850793223 0.05337272958896261 -0.06697619502213709
ADVB:  (19777,) (34608,) 0.5714574664817383
ADV2:  0.14187061047408583 0.35577342443503235 0.46977069117749126 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1582   0.4827   2.3694  96.9288  38.5936  26.9699
***** Episode 76268, Mean R = -9.6  Std R = 5.5  Min R = -26.1
PolicyLoss: 1.83
Policy_Entropy: 0.234
Policy_KL: 0.00531
Policy_SD: 0.538
Steps: 1.14e+04
TotalSteps: 2.82e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.000371


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0009   0.0032   5.5684   2.2087   2.1342
ADVA:  (22158,) (34840,) 0.6359931113662457
ADV1:  0.00029700232444385653 -0.0006245463770627203 0.008024456307048674 0.05337272958896261 -0.0754569669201492
ADVB:  (17981,) (34840,) 0.5161021814006889
ADV2:  0.029570628475090704 0.264441144806718 0.3896643959334577

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0018   5.5684   2.2087   2.1342
ADVA:  (21052,) (35051,) 0.6006105389289892
ADV1:  0.00039326735390865855 -0.00035387023778228805 0.007866023607601129 0.05177244419347843 -0.06060110776897093
ADVB:  (20413,) (35051,) 0.582379960628798
ADV2:  0.12699140165157993 0.3122651944872822 0.4216673901496591 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6009   0.2361   1.1094  96.9288  38.5936  26.9699
***** Episode 76516, Mean R = -9.5  Std R = 4.9  Min R = -24.5
PolicyLoss: 1.58
Policy_Entropy: 0.233
Policy_KL: 0.00791
Policy_SD: 0.538
Steps: 1.17e+04
TotalSteps: 2.82e+07
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.000486


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   5.5684   2.2087   2.1342
ADVA:  (22347,) (35065,) 0.6373021531441608
ADV1:  0.0004305097863392081 -0.0004122379723446048 0.00790912534115646 0.05177244419347843 -0.05317896145125012
ADVB

***** Episode 76733, Mean R = -9.4  Std R = 5.8  Min R = -28.4
PolicyLoss: 1.61
Policy_Entropy: 0.232
Policy_KL: 0.00721
Policy_SD: 0.544
Steps: 1.18e+04
TotalSteps: 2.83e+07
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.000643


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0022   5.5684   2.2087   2.1342
ADVA:  (24774,) (35318,) 0.7014553485474828
ADV1:  0.0 -0.0011057913331237199 0.009282995449764457 0.06750193615170585 -0.06710731337829967
ADVB:  (15190,) (35318,) 0.4300923042074863
ADV2:  0.0 0.2121016907295425 0.3916272349412594 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0872   0.3742   2.0331  96.9288  38.5936  26.9699
***** Episode 76764, Mean R = -10.5  Std R = 5.1  Min R = -21.0
PolicyLoss: 1.45
Policy_Entropy: 0.232
Policy_KL: 0.00991
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps: 2.83e+07
VF_0_ExplainedVarNew: 0.944
VF_0_ExplainedVarOld: 0.938
VF_0_Loss : 0.000588


ValFun  Gradients: u/sd/Max/C Max/Max u/Max 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2359   0.4083   1.9868  96.9288  38.5936  26.9699
***** Episode 76981, Mean R = -9.2  Std R = 4.3  Min R = -20.0
PolicyLoss: 2.13
Policy_Entropy: 0.233
Policy_KL: 0.00862
Policy_SD: 0.539
Steps: 1.19e+04
TotalSteps: 2.84e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.000325


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0013   5.5684   2.2087   2.1342
ADVA:  (20457,) (35215,) 0.580917222774386
ADV1:  0.0 -0.00041561605194140116 0.005601094230365075 0.052736288352239785 -0.07654385117842072
ADVB:  (16923,) (35215,) 0.48056226040039757
ADV2:  0.0 0.30157576881663334 0.4922182841507076 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5849   0.6745   3.0965  96.9288  38.5936  26.9699
***** Episode 77012, Mean R = -8.9  Std R = 4.1  Min R = -19.7
PolicyLoss: 1.85
Policy_Entropy: 0.232
Policy_KL: 0.00887
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 2.84e+07
VF_0_ExplainedVarNe

seeker_angles |    0.00    0.01 |    0.07    0.07 |   -0.98   -0.99 |    0.99    0.93
cs_angles |  0.0028  0.0058 |  0.0718  0.0744 | -0.9837 -0.9892 |  0.9938  0.9346
optical_flow |  0.0001 -0.0001 |  0.0199  0.0186 | -1.2346 -1.1200 |  0.9809  0.9191
v_err    | -0.0112 |  0.0607 | -0.4536 |  0.1052
landing_rewards |    9.61 |    1.93 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -14.65 |    3.94 |  -28.79 |   -8.32
steps    |     379 |      21 |     334 |     421
***** Episode 77260, Mean R = -10.5  Std R = 4.9  Min R = -23.6
PolicyLoss: 1.54
Policy_Entropy: 0.233
Policy_KL: 0.00915
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 2.85e+07
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.000769


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0011   0.0040   5.5684   2.2087   2.1342
ADVA:  (23824,) (35231,) 0.6762226448298374
ADV1:  0.0 -0.0010526166671792642 0.008089151103199181 0.03557018366103584 -0.06

theta_cv |    0.20 |    0.25 |    0.00 |    1.39
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -1.00   -0.97 |    0.95    0.97
cs_angles |  0.0013  0.0042 |  0.0721  0.0739 | -0.9955 -0.9703 |  0.9512  0.9728
optical_flow |  0.0001  0.0000 |  0.0192  0.0194 | -1.0651 -1.2380 |  0.8410  0.9418
v_err    | -0.0111 |  0.0602 | -0.4525 |  0.1021
landing_rewards |    9.39 |    2.40 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -14.67 |    3.90 |  -36.24 |   -7.95
steps    |     379 |      21 |     332 |     419
***** Episode 77570, Mean R = -9.6  Std R = 5.1  Min R = -23.5
PolicyLoss: 2.11
Policy_Entropy: 0.233
Policy_KL: 0.00494
Policy_SD: 0.531
Steps: 1.18e+04
TotalSteps: 2.86e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.00126


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0013   5.5684   2.2087   2.1342
ADVA:  (19202,) (35021,) 0.5482995916735673
ADV1:  0.0011492086165599928 0.0

theta_cv |    0.20 |    0.25 |    0.00 |    1.40
seeker_angles |    0.00    0.01 |    0.07    0.08 |   -0.99   -0.97 |    0.97    0.98
cs_angles |  0.0014  0.0061 |  0.0707  0.0768 | -0.9883 -0.9742 |  0.9658  0.9764
optical_flow |  0.0001 -0.0000 |  0.0187  0.0177 | -1.0896 -1.0937 |  1.0206  0.8900
v_err    | -0.0112 |  0.0606 | -0.4518 |  0.0966
landing_rewards |    9.55 |    2.08 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -14.67 |    4.50 |  -33.03 |   -8.16
steps    |     377 |      19 |     337 |     416
***** Episode 77880, Mean R = -8.1  Std R = 3.7  Min R = -15.1
PolicyLoss: 2.08
Policy_Entropy: 0.233
Policy_KL: 0.0187
Policy_SD: 0.529
Steps: 1.17e+04
TotalSteps: 2.88e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.000268


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0015   0.0055   5.5684   2.2087   2.1342
ADVA:  (19487,) (35089,) 0.5553592293881273
ADV1:  0.0 -0.000865939537608070

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.02    0.00 |    0.67    1.82 |   -1.41   -3.11 |    1.44    3.11
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.03   -0.02 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.36
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.97   -0.98 |    0.96    0.99
cs_angles |  0.0038  0.0034 |  0.0718  0.0751 | -0.9662 -0.9785 |  0.9622  0.9930
optical_flow |  0.0001  0.0001 |  0.0195  0.0179 | -0.9568 -1.0652 |  1.0154  1.0053
v_err    | -0.0112 |  0.0612 | -0.4565 |  0.1097
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -14.89 |    4.19 |  -29.93 |   -7.99
steps    |     380 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4300   0.1564   0.7692  96.9288  38.5936  26.9699
Update Cnt = 2530    ET =    781.6   Stats:  Mean, Std, Min, Max
r_f      |   -2.98  -10.76    9.56 |  187.38  167.50  196.71 | -391.06 -384.21 -380.79 |  363.99  368.97  379.08
v_f      |   -0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.11    0.09    0.10
r_i      |   -8.50   -1.38   11.22 |  705.55  621.22  772.97 |-1369.13-1293.01-1306.28 | 1364.05 1296.40 1270.68
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.10    0.10    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.34
norm_vf  |    0.08 |    0.01 |    0.04 |    0.11
gs_f     |    1.36 |    1.93 |    0.00 |   15.96
thrust   |    0.00    0.00    0.00 |    0.66    0.68    0.66 |   -3.45   -3.39   -3.41 |    3.40    3.30    3.46
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.49 |    0.20 |    1.06 |    3.46
rewards  |   -9.69 

ADVA:  (19820,) (35135,) 0.5641098619610075
ADV1:  0.0009002858378401087 0.0002688342751061184 0.007165656263080251 0.03976671758330763 -0.06176376990838229
ADVB:  (21257,) (35135,) 0.605009250035577
ADV2:  0.19745235442634393 0.37299369311791947 0.47160799207119347 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8800   0.3516   1.5567  96.9288  38.5936  26.9699
***** Episode 78748, Mean R = -9.4  Std R = 3.7  Min R = -16.4
PolicyLoss: 1.82
Policy_Entropy: 0.233
Policy_KL: 0.00576
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 2.91e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.000301


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0023   5.5684   2.2087   2.1342
ADVA:  (20659,) (35165,) 0.5874875586520688
ADV1:  0.0008865905654497908 -0.00013218467598552852 0.00774134091370247 0.035304167066931874 -0.07080918840067663
ADVB:  (22085,) (35165,) 0.6280392435660458
ADV2:  0.1953137235539701 0.3378619083431031 0.4180658604907201

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0009   5.5684   2.2087   2.1342
ADVA:  (18830,) (35234,) 0.5344269739456207
ADV1:  0.0011208213777396375 0.0006776628563436487 0.0064459952871877575 0.03798943049749881 -0.052346792267765375
ADVB:  (22695,) (35234,) 0.6441221547369018
ADV2:  0.29362731294536004 0.46082305046053185 0.5378052454733894 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7667   0.3506   1.4158  96.9288  38.5936  26.9699
***** Episode 78996, Mean R = -9.4  Std R = 4.7  Min R = -20.6
PolicyLoss: 2.11
Policy_Entropy: 0.234
Policy_KL: 0.00486
Policy_SD: 0.533
Steps: 1.18e+04
TotalSteps: 2.92e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.000636


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0019   5.5684   2.2087   2.1342
ADVA:  (19978,) (35449,) 0.5635701994414511
ADV1:  0.00066185220842482 4.2334949824811865e-05 0.0067431994988296235 0.04819909718148946 -0.052346792267765375
AD

***** Episode 79213, Mean R = -9.4  Std R = 5.0  Min R = -20.9
PolicyLoss: 1.73
Policy_Entropy: 0.233
Policy_KL: 0.00472
Policy_SD: 0.534
Steps: 1.17e+04
TotalSteps: 2.93e+07
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.000269


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0003   5.5684   2.2087   2.1342
ADVA:  (20868,) (35255,) 0.5919160402779747
ADV1:  0.0007151412348635269 -5.308878163414758e-05 0.007711212773296552 0.04750402344289539 -0.06286362464122497
ADVB:  (20809,) (35255,) 0.5902425187916608
ADV2:  0.1545853449035394 0.33632796284915717 0.4363193638385957 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5511   0.2032   0.9701  96.9288  38.5936  26.9699
***** Episode 79244, Mean R = -10.2  Std R = 5.7  Min R = -28.8
PolicyLoss: 1.68
Policy_Entropy: 0.234
Policy_KL: 0.00555
Policy_SD: 0.541
Steps: 1.19e+04
TotalSteps: 2.93e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.000264


ValFun  Gra

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8491   0.4055   1.8095  96.9288  38.5936  26.9699
***** Episode 79461, Mean R = -9.0  Std R = 4.7  Min R = -23.4
PolicyLoss: 1.8
Policy_Entropy: 0.234
Policy_KL: 0.00861
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 2.94e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00021


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0033   0.0018   0.0071   5.5684   2.2087   2.1342
ADVA:  (20092,) (34888,) 0.5759000229305206
ADV1:  0.0011828788520315964 0.0007117086674692715 0.006779079397404003 0.056011983072934035 -0.07119143957248136
ADVB:  (21570,) (34888,) 0.6182641595964229
ADV2:  0.24082330349660708 0.4287548735553173 0.517738151153747 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5972   0.2348   1.2641  96.9288  38.5936  26.9699
***** Episode 79492, Mean R = -8.7  Std R = 4.7  Min R = -23.8
PolicyLoss: 2.04
Policy_Entropy: 0.234
Policy_KL: 0.00778
Policy_SD: 0.539
Steps: 1.16e+04
TotalSteps:

theta_cv |    0.21 |    0.25 |    0.00 |    1.53
seeker_angles |    0.00    0.01 |    0.07    0.07 |   -0.99   -0.97 |    1.00    0.97
cs_angles |  0.0013  0.0063 |  0.0735  0.0726 | -0.9924 -0.9683 |  0.9961  0.9716
optical_flow |  0.0001 -0.0000 |  0.0190  0.0194 | -1.1781 -1.0711 |  1.1168  1.0952
v_err    | -0.0110 |  0.0604 | -0.4520 |  0.0968
landing_rewards |    9.74 |    1.59 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.01
tracking_rewards |  -14.51 |    3.88 |  -28.22 |   -8.20
steps    |     377 |      20 |     333 |     417
***** Episode 79740, Mean R = -10.8  Std R = 5.4  Min R = -23.2
PolicyLoss: 1.59
Policy_Entropy: 0.234
Policy_KL: 0.00937
Policy_SD: 0.542
Steps: 1.16e+04
TotalSteps: 2.95e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.000274


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0006   5.5684   2.2087   2.1342
ADVA:  (19763,) (35055,) 0.563771216659535
ADV1:  0.0 -0.00070307881943872

seeker_angles |    0.00    0.00 |    0.07    0.07 |   -1.00   -0.98 |    0.98    0.99
cs_angles |  0.0029  0.0040 |  0.0735  0.0748 | -0.9987 -0.9832 |  0.9803  0.9878
optical_flow |  0.0001  0.0001 |  0.0198  0.0195 | -1.1201 -1.1897 |  1.0251  1.1706
v_err    | -0.0110 |  0.0601 | -0.4524 |  0.1093
landing_rewards |    9.52 |    2.15 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -14.67 |    4.02 |  -29.53 |   -8.27
steps    |     379 |      20 |     336 |     418
***** Episode 80050, Mean R = -10.5  Std R = 5.3  Min R = -22.7
PolicyLoss: 1.64
Policy_Entropy: 0.234
Policy_KL: 0.00687
Policy_SD: 0.55
Steps: 1.16e+04
TotalSteps: 2.96e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.000271


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0016   5.5684   2.2087   2.1342
ADVA:  (20576,) (35036,) 0.5872816531567531
ADV1:  0.00048500374670922 -0.00032855134243246536 0.0085086297361012 0.101414274

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.00   -0.04 |    0.66    1.87 |   -1.53   -3.14 |    1.45    3.11
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.04   -0.02 |    0.03    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.52
seeker_angles |    0.00    0.01 |    0.08    0.07 |   -1.00   -0.99 |    1.00    0.99
cs_angles |  0.0010  0.0063 |  0.0758  0.0730 | -0.9966 -0.9910 |  0.9998  0.9930
optical_flow |  0.0000  0.0000 |  0.0194  0.0191 | -1.0570 -1.0105 |  0.9961  1.1032
v_err    | -0.0110 |  0.0603 | -0.4522 |  0.1040
landing_rewards |    9.52 |    2.15 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -14.59 |    4.80 |  -66.23 |   -7.23
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9356   0.4124   1.8105  96.9288  38.5936  26.9699
Update Cnt = 2600    ET =    817.3   Stats:  Mean, Std, Min, Max
r_f      |  -10.72   -2.51    3.11 |  187.58  173.73  203.62 | -386.93 -379.62 -366.12 |  383.44  387.14  383.28
v_f      |    0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.10    0.10    0.10
r_i      |  -39.88   17.91   22.51 |  684.94  667.18  763.40 |-1373.96-1344.24-1298.40 | 1338.56 1331.85 1201.11
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.10    0.10
norm_rf  |    0.13 |    0.06 |    0.03 |    0.33
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f     |    1.20 |    1.54 |    0.01 |   11.77
thrust   |    0.00   -0.00   -0.00 |    0.66    0.68    0.67 |   -3.45   -3.41   -3.42 |    3.36    3.41    3.35
norm_thrust |    0.91 |    0.72 |    0.00 |    3.46
fuel     |    1.52 |    0.19 |    1.05 |    2.20
rewards  |   -9.69 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.8669   0.7258   3.6232  96.9288  38.5936  26.9699
***** Episode 80918, Mean R = -9.0  Std R = 4.6  Min R = -19.7
PolicyLoss: 2.03
Policy_Entropy: 0.234
Policy_KL: 0.00652
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps: 2.99e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.00113


Dynamics: Max Disturbance (m/s^2):  [0.00133134 0.00141043 0.00142314] 0.0024056412678452664
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0012   0.0046   5.5684   2.2087   2.1342
ADVA:  (18816,) (35015,) 0.537369698700557
ADV1:  0.0013188461215296954 0.0013034358571188706 0.00691164338312967 0.05550170355198952 -0.06591193716416115
ADVB:  (21854,) (35015,) 0.6241325146365844
ADV2:  0.29134173792195034 0.514645222633492 0.6226462639490655 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.3087   1.1777   4.6641  96.9288  38.5936  26.9699
***** Episode 80949, Mean R = -10.0  Std R = 6.0  Min R = -24.1
PolicyLoss

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7128   0.2836   1.5675  96.9288  38.5936  26.9699
***** Episode 81166, Mean R = -10.0  Std R = 6.4  Min R = -26.7
PolicyLoss: 2.03
Policy_Entropy: 0.234
Policy_KL: 0.00711
Policy_SD: 0.531
Steps: 1.18e+04
TotalSteps: 3e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.000459


Dynamics: Max Disturbance (m/s^2):  [0.00133134 0.00141043 0.00142314] 0.0024056412678452664
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0013   0.0049   5.5684   2.2087   2.1342
ADVA:  (18636,) (35052,) 0.531667237247518
ADV1:  0.0007387778207953548 0.0005244611842887737 0.006764463328313426 0.06757933131467103 -0.06160007839424586
ADVB:  (21425,) (35052,) 0.6112347369622275
ADV2:  0.22299149553307815 0.43720568296272966 0.5685502262943122 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7443   0.2817   1.2624  96.9288  38.5936  26.9699
***** Episode 81197, Mean R = -9.4  Std R = 4.9  Min R = -22.3
PolicyLoss

ADVA:  (18702,) (35206,) 0.5312162699539851
ADV1:  0.0015477833307774502 0.0005846561673561628 0.00808414565719719 0.048332177576163315 -0.09382505521565937
ADVB:  (23955,) (35206,) 0.6804237913991933
ADV2:  0.318628832558155 0.4353782782375059 0.47650930858360524 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5759   0.2183   1.1142  96.9288  38.5936  26.9699
***** Episode 81414, Mean R = -10.7  Std R = 4.7  Min R = -21.9
PolicyLoss: 1.89
Policy_Entropy: 0.234
Policy_KL: 0.00679
Policy_SD: 0.547
Steps: 1.17e+04
TotalSteps: 3.01e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.000588


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0027   5.5684   2.2087   2.1342
ADVA:  (19030,) (35260,) 0.5397050482132728
ADV1:  0.0011864550844894722 0.00048122856127153657 0.007637922879129836 0.06123340996295201 -0.09382505521565937
ADVB:  (23572,) (35260,) 0.6685195689166193
ADV2:  0.29699534336751743 0.43811484560784814 0.499554453712943

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0026   5.5684   2.2087   2.1342
ADVA:  (18399,) (35194,) 0.5227879752230494
ADV1:  0.0007641359292302266 0.0006372499167761411 0.006625200221376998 0.05349697923069094 -0.09869614706769703
ADVB:  (21779,) (35194,) 0.6188270727964994
ADV2:  0.24850613312232409 0.4653243017475619 0.5669463399760162 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.4244   2.3091  10.9553  96.9288  38.5936  26.9699
***** Episode 81662, Mean R = -9.3  Std R = 4.2  Min R = -18.9
PolicyLoss: 2.22
Policy_Entropy: 0.234
Policy_KL: 0.00944
Policy_SD: 0.538
Steps: 1.17e+04
TotalSteps: 3.02e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000679


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0016   5.5684   2.2087   2.1342
ADVA:  (18528,) (35165,) 0.5268875302147021
ADV1:  0.0 -0.00030347458731064036 0.006788790066979396 0.05362045415548883 -0.09869614706769703
ADVB:  (19586,) (35165

cs_angles |  0.0042  0.0028 |  0.0739  0.0732 | -0.9755 -0.9946 |  0.9595  0.9596
optical_flow |  0.0000 -0.0000 |  0.0192  0.0191 | -1.0431 -1.0118 |  0.8838  0.9623
v_err    | -0.0110 |  0.0607 | -0.4556 |  0.1018
landing_rewards |    9.61 |    1.93 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -14.70 |    4.17 |  -34.38 |   -7.97
steps    |     380 |      21 |     339 |     423
***** Episode 81910, Mean R = -9.7  Std R = 5.0  Min R = -24.5
PolicyLoss: 1.45
Policy_Entropy: 0.234
Policy_KL: 0.00698
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 3.03e+07
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.000515


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0036   5.5684   2.2087   2.1342
ADVA:  (22707,) (35367,) 0.642039189074561
ADV1:  0.0001398746076514308 -0.0010809622847054775 0.008724777160285569 0.04057021304812053 -0.08463878664564617
ADVB:  (18441,) (35367,) 0.5214182712698278
ADV2:  0.

attitude |   -0.03   -0.10   -0.02 |    1.24    0.65    1.82 |   -3.14   -1.55   -3.14 |    3.14    1.54    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.10   -0.03 |    0.67    1.81 |   -1.49   -3.13 |    1.35    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.01 |    0.03    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.61
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.98   -0.93 |    0.95    0.99
cs_angles |  0.0020  0.0048 |  0.0727  0.0730 | -0.9752 -0.9317 |  0.9519  0.9929
optical_flow | -0.0000 -0.0001 |  0.0201  0.0186 | -0.9997 -0.9156 |  1.0847  1.1893
v_err    | -0.0106 |  0.0601 | -0.4524 |  0.1136
landing_rewards |    9.19 |    2.72 |    0.00 |   10.00
landing_margin |   -0

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0032   5.5684   2.2087   2.1342
ADVA:  (21290,) (34963,) 0.6089294396933902
ADV1:  0.0 -0.0008377188469213834 0.007754075482356838 0.05049569707074425 -0.056959078829890875
ADVB:  (16197,) (34963,) 0.46326116179961674
ADV2:  0.0 0.279580475495033 0.46917792220074617 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0153   0.4090   1.8796  96.9288  38.5936  26.9699
Update Cnt = 2660    ET =    837.4   Stats:  Mean, Std, Min, Max
r_f      |    3.30  -15.29   -3.63 |  176.36  171.36  206.91 | -391.00 -365.92 -390.28 |  391.88  387.09  702.02
v_f      |   -0.00    0.00   -0.00 |    0.05    0.07    0.05 |   -0.14   -0.09   -0.21 |    0.10    1.06    0.10
r_i      |   38.09  -22.05    3.99 |  672.65  666.21  762.34 |-1295.67-1335.33-1267.14 | 1334.04 1227.79 1326.10
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.08   -0.09 |    0.09    0.09    0.10
norm_rf  |    1.43 |   22.57 |    

Dynamics: Max Disturbance (m/s^2):  [0.00133134 0.00141043 0.00142314] 0.0024056412678452664
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0062   0.0036   0.0131   5.5684   2.2087   2.1342
ADVA:  (20401,) (35009,) 0.5827358679196778
ADV1:  0.0 -0.0005033321090419507 0.008111992713430122 0.049552882702156686 -0.05787103613875874
ADVB:  (17583,) (35009,) 0.5022422805564284
ADV2:  0.004699194785015803 0.2946786879568644 0.44855878798228116 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4289   0.1505   0.8344  96.9288  38.5936  26.9699
***** Episode 82778, Mean R = -10.8  Std R = 5.6  Min R = -25.1
PolicyLoss: 1.73
Policy_Entropy: 0.234
Policy_KL: 0.00546
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 3.06e+07
VF_0_ExplainedVarNew: 0.962
VF_0_ExplainedVarOld: 0.958
VF_0_Loss : 0.00106


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0020   0.0067   5.5684   2.2087   2.1342
ADVA:  (21002,) (34882,) 0.6020870362937905
ADV1:  0.000645978790641163 -6.401660043

***** Episode 82995, Mean R = -9.1  Std R = 5.2  Min R = -25.4
PolicyLoss: 1.69
Policy_Entropy: 0.234
Policy_KL: 0.0048
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 3.07e+07
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000403


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0010   0.0037   5.5684   2.2087   2.1342
ADVA:  (18496,) (35082,) 0.5272219371757596
ADV1:  0.001698401235562288 0.0010271789494569446 0.006946008617477101 0.0787837108888596 -0.06771939937451216
ADVB:  (25317,) (35082,) 0.7216521292970755
ADV2:  0.398194745963382 0.493076187826359 0.5073270694758472 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2554   0.0947   0.4733  96.9288  38.5936  26.9699
***** Episode 83026, Mean R = -9.7  Std R = 2.8  Min R = -14.6
PolicyLoss: 2.01
Policy_Entropy: 0.234
Policy_KL: 0.00389
Policy_SD: 0.545
Steps: 1.17e+04
TotalSteps: 3.07e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.000227


Dynamics: Max Distu

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3110   0.1163   0.6535  96.9288  38.5936  26.9699
***** Episode 83243, Mean R = -10.6  Std R = 5.7  Min R = -24.5
PolicyLoss: 1.31
Policy_Entropy: 0.233
Policy_KL: 0.00643
Policy_SD: 0.545
Steps: 1.17e+04
TotalSteps: 3.08e+07
VF_0_ExplainedVarNew: 0.958
VF_0_ExplainedVarOld: 0.952
VF_0_Loss : 0.000328


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0041   0.0027   0.0093   5.5684   2.2087   2.1342
ADVA:  (20170,) (35296,) 0.5714528558476881
ADV1:  0.0026346948516745025 0.0011652177878851647 0.009398339577447052 0.061132945053733845 -0.11314599343816917
ADVB:  (25683,) (35296,) 0.7276461922030825
ADV2:  0.388867774337637 0.4649861841961409 0.4653192651041308 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8855   0.3702   1.9847  96.9288  38.5936  26.9699
***** Episode 83274, Mean R = -8.2  Std R = 4.0  Min R = -18.1
PolicyLoss: 1.89
Policy_Entropy: 0.234
Policy_KL: 0.00502
Policy_SD: 0.542
Steps: 1.16e+04
TotalStep

ADVA:  (23668,) (35343,) 0.6696658461364343
ADV1:  0.0009893469830836825 -0.00026268143895683406 0.009032834645043388 0.03409160186847055 -0.0705730357085767
ADVB:  (21035,) (35343,) 0.5951673598732422
ADV2:  0.14587672355183195 0.30526286709981526 0.386772679557325 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9865   0.4215   2.1666  96.9288  38.5936  26.9699
***** Episode 83491, Mean R = -11.6  Std R = 6.6  Min R = -27.1
PolicyLoss: 1.51
Policy_Entropy: 0.234
Policy_KL: 0.00807
Policy_SD: 0.549
Steps: 1.18e+04
TotalSteps: 3.09e+07
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.000451


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0010   5.5684   2.2087   2.1342
ADVA:  (23146,) (35416,) 0.653546419697312
ADV1:  0.0007607894850627153 -0.0005814019017833023 0.00925468882466802 0.05001744462857338 -0.0705730357085767
ADVB:  (20905,) (35416,) 0.5902699344928846
ADV2:  0.13397339174664652 0.29499918920835233 0.3829702138400778

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.07    0.12 |    0.68    1.83 |   -1.51   -3.13 |    1.45    3.14
w_f      |    0.00    0.00    0.01 |    0.01    0.01    0.01 |   -0.03   -0.04   -0.01 |    0.03    0.03    0.04
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.40
seeker_angles |    0.00    0.01 |    0.08    0.08 |   -0.98   -0.98 |    0.98    0.99
cs_angles |  0.0021  0.0053 |  0.0760  0.0777 | -0.9812 -0.9815 |  0.9834  0.9936
optical_flow |  0.0001 -0.0001 |  0.0205  0.0192 | -1.1075 -1.0763 |  0.9962  1.1142
v_err    | -0.0110 |  0.0606 | -0.4649 |  0.1061
landing_rewards |    9.10 |    2.87 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -14.98 |    4.16 |  -36.39 |   -8.30
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5811   0.2703   1.1387  96.9288  38.5936  26.9699
Update Cnt = 2710    ET =    879.0   Stats:  Mean, Std, Min, Max
r_f      |   -8.43   -6.59  -12.34 |  198.42  168.72  193.35 | -384.75 -390.68 -395.48 |  395.54  376.27  396.81
v_f      |    0.00    0.00    0.00 |    0.05    0.05    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.10    0.10
r_i      |   -5.07  -49.22  -34.54 |  724.05  662.76  726.78 |-1235.76-1239.53-1251.97 | 1371.63 1335.05 1257.46
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.40
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f     |    1.24 |    2.44 |    0.00 |   32.52
thrust   |    0.00   -0.00    0.00 |    0.68    0.68    0.66 |   -3.44   -3.43   -3.44 |    3.42    3.43    3.45
norm_thrust |    0.91 |    0.73 |    0.00 |    3.46
fuel     |    1.52 |    0.17 |    1.05 |    2.00
rewards  |   -9.84 

ADVA:  (21933,) (35181,) 0.6234331030954208
ADV1:  0.001700928282466374 0.0003452591425455506 0.009119942510510763 0.03762623439265761 -0.06280038413743343
ADVB:  (22459,) (35181,) 0.6383843551917228
ADV2:  0.22500526511208027 0.36806018583499095 0.42709511759082475 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9084   0.3384   1.4331  96.9288  38.5936  26.9699
***** Episode 84328, Mean R = -10.0  Std R = 4.1  Min R = -21.1
PolicyLoss: 1.7
Policy_Entropy: 0.234
Policy_KL: 0.00609
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 3.12e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.000387


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0027   5.5684   2.2087   2.1342
ADVA:  (20316,) (35114,) 0.5785726490858347
ADV1:  0.001339379515872461 0.0008931960715666727 0.007744258919270711 0.04641010936057832 -0.06280038413743343
ADVB:  (21988,) (35114,) 0.6261889844506464
ADV2:  0.22957143575448274 0.4223680006214133 0.5075248754704436 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0052   0.0032   0.0121   5.5684   2.2087   2.1342
ADVA:  (18897,) (35326,) 0.5349317782936082
ADV1:  0.0019113116997975192 0.0003760449919394195 0.00948272048717968 0.04709591928147738 -0.058492100537440514
ADVB:  (23675,) (35326,) 0.6701862650738832
ADV2:  0.3194753833710171 0.4252617884439314 0.4533782968328402 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5312   0.6673   3.2315  96.9288  38.5936  26.9699
***** Episode 84576, Mean R = -8.9  Std R = 4.3  Min R = -21.1
PolicyLoss: 1.88
Policy_Entropy: 0.235
Policy_KL: 0.00504
Policy_SD: 0.553
Steps: 1.17e+04
TotalSteps: 3.13e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00032


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0043   0.0025   0.0090   5.5684   2.2087   2.1342
ADVA:  (19161,) (35363,) 0.5418375137855951
ADV1:  0.0009655716276860994 0.00015330038066673645 0.008727504685541926 0.04709591928147738 -0.07287826846130419
ADVB:  (

***** Episode 84793, Mean R = -9.9  Std R = 4.5  Min R = -18.1
PolicyLoss: 2.02
Policy_Entropy: 0.235
Policy_KL: 0.00574
Policy_SD: 0.533
Steps: 1.17e+04
TotalSteps: 3.14e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.000389


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0012   5.5684   2.2087   2.1342
ADVA:  (18045,) (35247,) 0.5119584645501745
ADV1:  0.0 -0.00013881411797759112 0.0047617579926742285 0.03680360889722273 -0.056610207956439895
ADVB:  (17492,) (35247,) 0.49626918603001674
ADV2:  0.0 0.3705520219549211 0.5761410033492403 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2066   0.4308   1.9909  96.9288  38.5936  26.9699
***** Episode 84824, Mean R = -9.4  Std R = 4.6  Min R = -20.0
PolicyLoss: 2.17
Policy_Entropy: 0.235
Policy_KL: 0.00826
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 3.14e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.000309


ValFun  Gradients: u/sd/Max/C Max/Max u/Max

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6386   0.2708   1.2223  96.9288  38.5936  26.9699
***** Episode 85041, Mean R = -10.0  Std R = 4.5  Min R = -24.4
PolicyLoss: 1.68
Policy_Entropy: 0.235
Policy_KL: 0.00616
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 3.15e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.000264


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0011   0.0046   5.5684   2.2087   2.1342
ADVA:  (19552,) (35560,) 0.5498312710911136
ADV1:  0.00045239240634146766 0.00013071004880652647 0.006858806693003651 0.04833714763572994 -0.10720377231223122
ADVB:  (20651,) (35560,) 0.5807367829021373
ADV2:  0.1471765458485233 0.3815783257109715 0.5180081166060285 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3976   0.1733   0.8587  96.9288  38.5936  26.9699
***** Episode 85072, Mean R = -9.7  Std R = 5.2  Min R = -29.3
PolicyLoss: 1.93
Policy_Entropy: 0.235
Policy_KL: 0.00664
Policy_SD: 0.543
Steps: 1.2e+04
TotalSte

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.02   -0.06 |    0.66    1.88 |   -1.38   -3.09 |    1.47    3.14
w_f      |    0.00    0.00    0.01 |    0.01    0.01    0.01 |   -0.04   -0.03   -0.01 |    0.03    0.03    0.04
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.72
seeker_angles |    0.00    0.00 |    0.08    0.07 |   -0.99   -0.99 |    0.92    0.95
cs_angles |  0.0033  0.0029 |  0.0766  0.0744 | -0.9890 -0.9924 |  0.9200  0.9464
optical_flow | -0.0001 -0.0001 |  0.0196  0.0188 | -1.1452 -0.9799 |  1.0875  0.9551
v_err    | -0.0108 |  0.0596 | -0.4547 |  0.1025
landing_rewards |    9.35 |    2.46 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -14.83 |    4.06 |  -32.86 |   -7.93
steps    |     380 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9712   0.4384   1.8455  96.9288  38.5936  26.9699
Update Cnt = 2760    ET =   1449.1   Stats:  Mean, Std, Min, Max
r_f      |   -6.49    7.33   11.70 |  181.50  165.14  205.03 | -378.53 -382.16 -393.54 |  377.93  366.20  389.08
v_f      |    0.00   -0.00   -0.01 |    0.04    0.04    0.05 |   -0.11   -0.10   -0.11 |    0.11    0.09    0.09
r_i      |  -33.10   23.87   80.12 |  693.32  622.01  773.19 |-1268.30-1289.57-1274.46 | 1299.46 1340.41 1370.09
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.14 |    0.06 |    0.03 |    0.33
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.59 |    4.46 |    0.01 |   66.48
thrust   |    0.00    0.01   -0.00 |    0.67    0.67    0.66 |   -3.44   -3.44   -3.34 |    3.41    3.40    3.30
norm_thrust |    0.90 |    0.73 |    0.00 |    3.46
fuel     |    1.50 |    0.17 |    1.11 |    2.01
rewards  |   -9.38 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3924   0.6252   2.7620  96.9288  38.5936  26.9699
***** Episode 85878, Mean R = -10.8  Std R = 5.3  Min R = -23.0
PolicyLoss: 1.67
Policy_Entropy: 0.236
Policy_KL: 0.00733
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 3.18e+07
VF_0_ExplainedVarNew: 0.963
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.000651


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0049   0.0026   0.0107   5.5684   2.2087   2.1342
ADVA:  (20349,) (35342,) 0.5757738667873917
ADV1:  0.0007543603918384809 0.00015325658242435096 0.007731260130425306 0.06101102215742371 -0.0588361143680457
ADVB:  (20763,) (35342,) 0.587487974647728
ADV2:  0.16498558652553935 0.3809619511880478 0.4927229078731066 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3436   0.4729   2.0880  96.9288  38.5936  26.9699
***** Episode 85909, Mean R = -9.3  Std R = 4.3  Min R = -20.0
PolicyLoss: 1.9
Policy_Entropy: 0.236
Policy_KL: 0.00773
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps

ADVA:  (20041,) (35469,) 0.5650286165383857
ADV1:  0.0013266035139259183 0.0009427480499403972 0.006932128795471057 0.06026670550285662 -0.11464151751313811
ADVB:  (22595,) (35469,) 0.6370351574614452
ADV2:  0.25949827951590393 0.4324288968608105 0.5247116872943004 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  11.8554   6.8302  27.8070  96.9288  38.5936  26.9699
***** Episode 86126, Mean R = -10.9  Std R = 7.2  Min R = -36.4
PolicyLoss: 1.99
Policy_Entropy: 0.235
Policy_KL: 0.0236
Policy_SD: 0.546
Steps: 1.19e+04
TotalSteps: 3.19e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.00049


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0005   0.0023   5.5684   2.2087   2.1342
ADVA:  (21672,) (35281,) 0.6142683030526346
ADV1:  0.0008558705257901162 0.0003056614653673763 0.008026131759942927 0.0519651673756108 -0.11464151751313811
ADVB:  (20130,) (35281,) 0.5705620588985573
ADV2:  0.11443624301872668 0.33243712577821327 0.47945379455337284 3

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0014   0.0053   5.5684   2.2087   2.1342
ADVA:  (20045,) (35082,) 0.5713756342283792
ADV1:  0.0003121211112166143 -0.00021798260968906562 0.008012559834427902 0.050482358641749214 -0.11630478028549784
ADVB:  (19939,) (35082,) 0.5683541417251012
ADV2:  0.11149795102174148 0.3284656547028995 0.4477231370187297 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6618   0.2391   1.0737  96.9288  38.5936  26.9699
***** Episode 86374, Mean R = -8.7  Std R = 5.3  Min R = -21.1
PolicyLoss: 1.68
Policy_Entropy: 0.237
Policy_KL: 0.0087
Policy_SD: 0.541
Steps: 1.15e+04
TotalSteps: 3.2e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.961
VF_0_Loss : 0.000426


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0012   5.5684   2.2087   2.1342
ADVA:  (20814,) (35024,) 0.5942782092279579
ADV1:  0.001123967478273838 0.0005254758932648259 0.007924129409232226 0.056438183763579175 -0.10083745754543394
ADVB:

***** Episode 86591, Mean R = -10.1  Std R = 5.6  Min R = -23.7
PolicyLoss: 1.56
Policy_Entropy: 0.237
Policy_KL: 0.00644
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 3.21e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.000633


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0021   5.5684   2.2087   2.1342
ADVA:  (21677,) (35000,) 0.6193428571428572
ADV1:  0.0011649343517808 -5.9039878482707e-05 0.007991309304192479 0.04713654300985054 -0.06226103707755853
ADVB:  (22691,) (35000,) 0.6483142857142857
ADV2:  0.22325675862963468 0.33612394832581965 0.38225622283742267 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3706   0.1551   0.7625  96.9288  38.5936  26.9699
***** Episode 86622, Mean R = -8.3  Std R = 4.4  Min R = -19.5
PolicyLoss: 1.51
Policy_Entropy: 0.237
Policy_KL: 0.00635
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 3.21e+07
VF_0_ExplainedVarNew: 0.968
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.000464


ValFun  Gradi

seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.98   -0.98 |    0.89    0.94
cs_angles |  0.0042  0.0033 |  0.0727  0.0752 | -0.9779 -0.9784 |  0.8948  0.9442
optical_flow |  0.0000  0.0001 |  0.0178  0.0181 | -1.0571 -0.9858 |  0.9309  0.9485
v_err    | -0.0110 |  0.0601 | -0.4536 |  0.1041
landing_rewards |    9.39 |    2.40 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -14.63 |    3.76 |  -27.32 |   -8.53
steps    |     378 |      19 |     338 |     418
***** Episode 86870, Mean R = -9.0  Std R = 4.3  Min R = -17.8
PolicyLoss: 2.04
Policy_Entropy: 0.237
Policy_KL: 0.007
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 3.22e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.001


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0012   0.0045   5.5684   2.2087   2.1342
ADVA:  (21136,) (34819,) 0.6070249001981677
ADV1:  0.0013067669725425283 0.0008990162436899445 0.00674324106554259 0.040117304184

attitude |    0.06   -0.02   -0.01 |    1.10    0.62    1.79 |   -3.14   -1.54   -3.14 |    3.14    1.57    3.14
w        |   -0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.02   -0.02 |    0.62    1.79 |   -1.47   -3.13 |    1.53    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.03   -0.02 |    0.03    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.40
seeker_angles |    0.00    0.00 |    0.08    0.07 |   -0.99   -1.00 |    0.98    0.97
cs_angles |  0.0038  0.0037 |  0.0784  0.0737 | -0.9898 -0.9971 |  0.9790  0.9710
optical_flow |  0.0000  0.0000 |  0.0193  0.0193 | -1.0984 -1.0598 |  0.9845  0.9639
v_err    | -0.0110 |  0.0597 | -0.4529 |  0.1073
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5651   0.1693   0.8996  96.9288  38.5936  26.9699
Update Cnt = 2820    ET =   1732.8   Stats:  Mean, Std, Min, Max
r_f      |  -15.07    7.23    2.46 |  181.91  169.71  199.58 | -365.88 -372.40 -393.94 |  380.54  383.08  374.78
v_f      |    0.00   -0.00   -0.00 |    0.04    0.05    0.05 |   -0.10   -0.08   -0.10 |    0.09    0.10    0.10
r_i      |  -63.64    3.41   18.03 |  665.34  677.94  764.13 |-1275.56-1302.08-1306.89 | 1357.50 1332.85 1264.13
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.10    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.43
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.26 |    1.87 |    0.01 |   17.00
thrust   |    0.00    0.00   -0.00 |    0.66    0.68    0.66 |   -3.33   -3.40   -3.38 |    3.28    3.36    3.39
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.49 |    0.17 |    1.07 |    2.08
rewards  |   -9.39 

ADVA:  (21463,) (35492,) 0.6047278259889552
ADV1:  0.0006995499804660094 2.6236977219437888e-05 0.008022986104108735 0.060427908869387925 -0.07290221952810111
ADVB:  (20818,) (35492,) 0.5865547165558436
ADV2:  0.13445213862024866 0.34334369588423286 0.46894697217577397 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7315   0.3738   1.4960  96.9288  38.5936  26.9699
***** Episode 87738, Mean R = -9.3  Std R = 4.0  Min R = -16.5
PolicyLoss: 1.7
Policy_Entropy: 0.238
Policy_KL: 0.00895
Policy_SD: 0.534
Steps: 1.19e+04
TotalSteps: 3.25e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.000272


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0021   0.0080   5.5684   2.2087   2.1342
ADVA:  (20465,) (35536,) 0.5758948671769473
ADV1:  0.0018145060899449183 0.0013624171139710799 0.006982312066320535 0.060427908869387925 -0.07290221952810111
ADVB:  (23621,) (35536,) 0.6647062134173796
ADV2:  0.2973211402161301 0.47038446130271316 0.54398576129690

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0009   5.5684   2.2087   2.1342
ADVA:  (21725,) (35102,) 0.6189106033844225
ADV1:  0.0007523093057616136 -0.000329395195636596 0.008414631517660372 0.0415003818902816 -0.08513860720095584
ADVB:  (20920,) (35102,) 0.5959774371830665
ADV2:  0.14534620300094242 0.31267304444449334 0.41943237474673617 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4483   0.1774   0.8252  96.9288  38.5936  26.9699
***** Episode 87986, Mean R = -12.4  Std R = 5.4  Min R = -27.3
PolicyLoss: 1.53
Policy_Entropy: 0.238
Policy_KL: 0.00561
Policy_SD: 0.538
Steps: 1.17e+04
TotalSteps: 3.26e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.000352


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0015   0.0056   5.5684   2.2087   2.1342
ADVA:  (20026,) (34811,) 0.5752779293901353
ADV1:  0.001734846950686826 0.000789786148655711 0.007627252621116896 0.0415003818902816 -0.08566012203840362
ADVB:  

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0049   0.0028   0.0102   5.5684   2.2087   2.1342
ADVA:  (19338,) (35300,) 0.5478186968838527
ADV1:  0.0029686925011781114 0.001885483802626491 0.008554077382575943 0.03894033706417532 -0.06300229218110037
ADVB:  (26641,) (35300,) 0.7547025495750708
ADV2:  0.5036877763871987 0.5763032568489302 0.5297119283887534 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4515   0.1813   0.7549  96.9288  38.5936  26.9699
***** Episode 88234, Mean R = -9.3  Std R = 4.1  Min R = -19.1
PolicyLoss: 2.23
Policy_Entropy: 0.238
Policy_KL: 0.00587
Policy_SD: 0.537
Steps: 1.18e+04
TotalSteps: 3.27e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.000347


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0019   0.0078   5.5684   2.2087   2.1342
ADVA:  (18867,) (35276,) 0.5348395509694976
ADV1:  0.0011269054582947972 0.0006593493632926249 0.008035267045612266 0.044553801649611535 -0.06300229218110037
ADVB:  

***** Episode 88451, Mean R = -10.1  Std R = 4.8  Min R = -26.6
PolicyLoss: 1.64
Policy_Entropy: 0.237
Policy_KL: 0.00664
Policy_SD: 0.538
Steps: 1.17e+04
TotalSteps: 3.28e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.000231


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0016   0.0061   5.5684   2.2087   2.1342
ADVA:  (22053,) (35278,) 0.6251204716820682
ADV1:  0.0008021688620831894 0.00012959915734989583 0.007781232234141326 0.04446761139670477 -0.06337093454960863
ADVB:  (19897,) (35278,) 0.5640058960258518
ADV2:  0.11319721614085085 0.31929904020403616 0.441084236974955 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6158   0.2378   1.2005  96.9288  38.5936  26.9699
***** Episode 88482, Mean R = -9.3  Std R = 4.8  Min R = -22.8
PolicyLoss: 1.64
Policy_Entropy: 0.237
Policy_KL: 0.00836
Policy_SD: 0.541
Steps: 1.18e+04
TotalSteps: 3.28e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.00041


ValFun  Gra

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.04    0.04 |    0.68    1.90 |   -1.45   -3.12 |    1.43    3.11
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.04   -0.01 |    0.03    0.04    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.43
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -1.00 |    0.99    0.99
cs_angles |  0.0038  0.0036 |  0.0767  0.0760 | -0.9908 -0.9977 |  0.9915  0.9881
optical_flow | -0.0001 -0.0001 |  0.0197  0.0188 | -1.0608 -1.0591 |  1.0619  1.0918
v_err    | -0.0107 |  0.0598 | -0.4521 |  0.1043
landing_rewards |    9.16 |    2.77 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -14.94 |    4.02 |  -30.78 |   -8.73
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.9289   1.2154   5.6863  96.9288  38.5936  26.9699
Update Cnt = 2870    ET =   1444.0   Stats:  Mean, Std, Min, Max
r_f      |  -12.15   11.86   33.26 |  177.97  174.27  209.87 | -386.33 -375.17 -387.31 |  386.67  388.18  394.35
v_f      |    0.00   -0.00   -0.01 |    0.05    0.05    0.05 |   -0.11   -0.10   -0.10 |    0.10    0.09    0.10
r_i      |  -55.25   61.54   97.96 |  678.88  682.32  752.87 |-1348.98-1281.59-1274.74 | 1307.99 1299.44 1321.03
v_i      |    0.00   -0.00   -0.01 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.15 |    0.06 |    0.02 |    0.35
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f     |    1.14 |    1.28 |    0.01 |    8.90
thrust   |    0.01    0.00    0.00 |    0.66    0.68    0.66 |   -3.32   -3.39   -3.44 |    3.35    3.44    3.28
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.49 |    0.19 |    1.04 |    2.27
rewards  |   -9.92 

ADVA:  (24362,) (35333,) 0.6894970707270823
ADV1:  0.0016782016968867114 0.0003293904308888469 0.009241632522689903 0.03376700979898922 -0.06758225140807195
ADVB:  (20854,) (35333,) 0.5902131152180681
ADV2:  0.15088743363004753 0.31812815362962465 0.4084777991208864 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7686   0.3374   1.4285  96.9288  38.5936  26.9699
***** Episode 89288, Mean R = -10.2  Std R = 5.4  Min R = -22.7
PolicyLoss: 1.57
Policy_Entropy: 0.239
Policy_KL: 0.00501
Policy_SD: 0.524
Steps: 1.17e+04
TotalSteps: 3.31e+07
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.957
VF_0_Loss : 0.00554


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0030   5.5684   2.2087   2.1342
ADVA:  (24029,) (34861,) 0.6892802845586758
ADV1:  0.001990680537988343 0.0006171484092012406 0.009354935006609357 0.06395264856435484 -0.06415733568741758
ADVB:  (20374,) (34861,) 0.5844353288775422
ADV2:  0.15297218198012538 0.329932192520324 0.4228354235582726 3

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0035   5.5684   2.2087   2.1342
ADVA:  (23180,) (35356,) 0.6556171512614549
ADV1:  0.002586805470308897 0.0009024008966297684 0.009003621107813801 0.05934025080650762 -0.07001920264899424
ADVB:  (25156,) (35356,) 0.711505826450956
ADV2:  0.3235806512401575 0.3925124935955413 0.4054984252360572 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4970   0.1700   0.8243  96.9288  38.5936  26.9699
***** Episode 89536, Mean R = -10.8  Std R = 6.0  Min R = -28.1
PolicyLoss: 1.61
Policy_Entropy: 0.239
Policy_KL: 0.00604
Policy_SD: 0.529
Steps: 1.17e+04
TotalSteps: 3.32e+07
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.00331


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0022   5.5684   2.2087   2.1342
ADVA:  (22247,) (35145,) 0.6330061175131597
ADV1:  0.0021395449056711147 0.0008864891420362588 0.00837225136659629 0.05934025080650762 -0.06371931265367795
ADVB:  (24

***** Episode 89753, Mean R = -10.8  Std R = 5.0  Min R = -26.0
PolicyLoss: 1.71
Policy_Entropy: 0.238
Policy_KL: 0.00671
Policy_SD: 0.533
Steps: 1.17e+04
TotalSteps: 3.33e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.000404


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0044   0.0026   0.0097   5.5684   2.2087   2.1342
ADVA:  (20995,) (35008,) 0.5997200639853748
ADV1:  0.0004899779684271743 -0.0005041697378062457 0.009333435304984207 0.05662434623835144 -0.07447295060426982
ADVB:  (19604,) (35008,) 0.5599862888482633
ADV2:  0.09721081256728933 0.3261997263285139 0.4584066397741422 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5646   0.2318   1.0591  96.9288  38.5936  26.9699
***** Episode 89784, Mean R = -11.0  Std R = 6.2  Min R = -28.4
PolicyLoss: 1.69
Policy_Entropy: 0.238
Policy_KL: 0.00698
Policy_SD: 0.532
Steps: 1.15e+04
TotalSteps: 3.33e+07
VF_0_ExplainedVarNew: 0.951
VF_0_ExplainedVarOld: 0.945
VF_0_Loss : 0.000544


ValFun  G

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6940   0.3048   1.3884  96.9288  38.5936  26.9699
***** Episode 90001, Mean R = -8.1  Std R = 4.0  Min R = -19.5
PolicyLoss: 1.93
Policy_Entropy: 0.238
Policy_KL: 0.00637
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps: 3.33e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.000315


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0015   5.5684   2.2087   2.1342
ADVA:  (21891,) (35042,) 0.6247074938645054
ADV1:  0.0015670993486119594 0.0010115854732688945 0.006487742603351795 0.04865827159133301 -0.059257609390544204
ADVB:  (22070,) (35042,) 0.6298156497916786
ADV2:  0.21772989112654323 0.3836200360697165 0.48298918008764313 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5244   0.1980   0.9380  96.9288  38.5936  26.9699
***** Episode 90032, Mean R = -10.8  Std R = 5.0  Min R = -21.9
PolicyLoss: 1.77
Policy_Entropy: 0.238
Policy_KL: 0.00609
Policy_SD: 0.535
Steps: 1.18e+04
TotalS

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.06    0.14 |    0.63    1.94 |   -1.46   -3.14 |    1.46    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.01 |    0.02    0.04    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.26 |    0.00 |    1.34
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -1.00   -0.97 |    0.93    0.98
cs_angles |  0.0025  0.0037 |  0.0752  0.0759 | -0.9962 -0.9702 |  0.9292  0.9791
optical_flow | -0.0001 -0.0001 |  0.0197  0.0176 | -1.0633 -1.0080 |  1.0303  1.0595
v_err    | -0.0104 |  0.0597 | -0.4534 |  0.1060
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -14.75 |    4.52 |  -38.93 |   -7.12
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.1235   2.3512   9.8086  96.9288  38.5936  26.9699
Update Cnt = 2920    ET =   1403.6   Stats:  Mean, Std, Min, Max
r_f      |   -3.97    1.82  -15.01 |  186.45  174.33  197.89 | -387.37 -381.42 -392.88 |  375.99  373.58  390.68
v_f      |    0.00    0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.12 |    0.10    0.11    0.10
r_i      |    0.07  -18.54  -43.09 |  707.69  670.28  729.45 |-1310.03-1346.43-1325.42 | 1328.90 1348.36 1250.09
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.04 |   -0.09   -0.10   -0.10 |    0.09    0.10    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.35
norm_vf  |    0.08 |    0.01 |    0.02 |    0.12
gs_f     |    1.09 |    1.40 |    0.01 |   11.23
thrust   |   -0.00   -0.01    0.00 |    0.66    0.68    0.66 |   -3.39   -3.45   -3.32 |    3.38    3.44    3.46
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.47 |    0.17 |    1.11 |    2.05
rewards  |   -9.42 

ADVA:  (22077,) (35253,) 0.6262445749297932
ADV1:  0.00028518682781194116 -0.0006303837364846314 0.007701619563694918 0.04671790473957954 -0.06115585884067931
ADVB:  (19658,) (35253,) 0.5576263013076901
ADV2:  0.08200538769929938 0.2723270643230884 0.3858361883660447 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7429   0.3614   1.4146  96.9288  38.5936  26.9699
***** Episode 90838, Mean R = -9.3  Std R = 5.1  Min R = -21.6
PolicyLoss: 1.41
Policy_Entropy: 0.238
Policy_KL: 0.00724
Policy_SD: 0.534
Steps: 1.17e+04
TotalSteps: 3.37e+07
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.000336


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   5.5684   2.2087   2.1342
ADVA:  (22878,) (35174,) 0.6504236083470746
ADV1:  0.0003981019411056245 -0.0007580897745471972 0.008722731981164342 0.04671790473957954 -0.05259827474098082
ADVB:  (19646,) (35174,) 0.5585375561494286
ADV2:  0.08381432939890326 0.26941178625527823 0.3726735061416

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0023   5.5684   2.2087   2.1342
ADVA:  (20295,) (34600,) 0.5865606936416184
ADV1:  0.0001399660666221228 -0.00021078600109881047 0.009295208836709312 0.10050206819354862 -0.07981204090896632
ADVB:  (18690,) (34600,) 0.5401734104046243
ADV2:  0.06513342551979817 0.3253158953720673 0.49897141659909017 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6114   0.1868   1.0150  96.9288  38.5936  26.9699
***** Episode 91086, Mean R = -9.1  Std R = 5.2  Min R = -20.8
PolicyLoss: 1.76
Policy_Entropy: 0.238
Policy_KL: 0.00958
Policy_SD: 0.539
Steps: 1.16e+04
TotalSteps: 3.38e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.000966


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0011   0.0040   5.5684   2.2087   2.1342
ADVA:  (20199,) (34789,) 0.5806145620742189
ADV1:  0.0 -0.0008962824444749393 0.008156295157424729 0.10050206819354862 -0.07981204090896632
ADVB:  (18036,) (347

***** Episode 91303, Mean R = -9.5  Std R = 5.6  Min R = -22.5
PolicyLoss: 1.69
Policy_Entropy: 0.239
Policy_KL: 0.00824
Policy_SD: 0.536
Steps: 1.16e+04
TotalSteps: 3.38e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.000291


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0016   5.5684   2.2087   2.1342
ADVA:  (20144,) (34953,) 0.5763167682316254
ADV1:  0.0005978782177553724 0.00015816714841545112 0.006697058857016363 0.053152805920373414 -0.053073899813172176
ADVB:  (20748,) (34953,) 0.5935971161273711
ADV2:  0.16075401766747688 0.3562618323481542 0.47417648372264254 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3027   0.0986   0.4893  96.9288  38.5936  26.9699
***** Episode 91334, Mean R = -9.2  Std R = 4.7  Min R = -22.8
PolicyLoss: 1.73
Policy_Entropy: 0.239
Policy_KL: 0.0084
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps: 3.38e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.000372


ValFun  G

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5755   0.2165   1.0095  96.9288  38.5936  26.9699
***** Episode 91551, Mean R = -10.1  Std R = 5.0  Min R = -20.8
PolicyLoss: 1.58
Policy_Entropy: 0.238
Policy_KL: 0.0109
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 3.39e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.000688


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0025   5.5684   2.2087   2.1342
ADVA:  (21061,) (34782,) 0.6055143465010637
ADV1:  0.0008363201400621318 0.00016818189101140116 0.007075878025011372 0.04833223276656107 -0.0688831084535978
ADVB:  (20454,) (34782,) 0.5880627910988442
ADV2:  0.13932351129215112 0.3204618736114507 0.43909809393358445 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8330   0.3139   1.4835  96.9288  38.5936  26.9699
***** Episode 91582, Mean R = -8.7  Std R = 4.7  Min R = -22.4
PolicyLoss: 1.58
Policy_Entropy: 0.239
Policy_KL: 0.00935
Policy_SD: 0.54
Steps: 1.16e+04
TotalStep

attitude |    0.02    0.00    0.02 |    1.26    0.66    1.91 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |   -0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.00    0.09 |    0.65    1.91 |   -1.50   -3.11 |    1.48    3.12
w_f      |    0.00    0.00    0.01 |    0.01    0.01    0.01 |   -0.04   -0.03   -0.01 |    0.03    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.49
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.98   -0.99 |    0.99    0.93
cs_angles |  0.0002  0.0010 |  0.0743  0.0779 | -0.9846 -0.9949 |  0.9926  0.9258
optical_flow | -0.0002  0.0000 |  0.0196  0.0183 | -1.1171 -0.9713 |  1.0775  0.9328
v_err    | -0.0103 |  0.0594 | -0.4546 |  0.1060
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4360   0.1962   0.9679  96.9288  38.5936  26.9699
Update Cnt = 2970    ET =   1575.3   Stats:  Mean, Std, Min, Max
r_f      |    9.04    7.52   -8.49 |  192.20  164.02  185.83 | -383.20 -344.63 -383.16 |  393.73  374.03  390.02
v_f      |    0.00   -0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.10    0.09    0.11
r_i      |   13.06   30.81  -48.80 |  719.95  656.28  719.24 |-1319.71-1281.77-1292.38 | 1347.19 1269.13 1306.66
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.10    0.10
norm_rf  |    0.14 |    0.06 |    0.03 |    0.36
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.46 |    3.30 |    0.00 |   34.44
thrust   |    0.00    0.00   -0.00 |    0.66    0.68    0.67 |   -3.37   -3.46   -3.45 |    3.44    3.44    3.37
norm_thrust |    0.91 |    0.72 |    0.00 |    3.46
fuel     |    1.48 |    0.19 |    1.05 |    2.18
rewards  |   -9.21 

ADVA:  (19048,) (35060,) 0.5432972047917856
ADV1:  0.0 -0.0005007144320988301 0.006574140500202516 0.0640317077364067 -0.06152589332955966
ADVB:  (18716,) (35060,) 0.5338277239018825
ADV2:  0.05888386381525685 0.3174025735119107 0.4779618000268182 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.7462   2.9940  10.9362  96.9288  38.5936  26.9699
***** Episode 92388, Mean R = -10.4  Std R = 5.4  Min R = -30.2
PolicyLoss: 1.72
Policy_Entropy: 0.239
Policy_KL: 0.00991
Policy_SD: 0.543
Steps: 1.18e+04
TotalSteps: 3.42e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.00265


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0029   5.5684   2.2087   2.1342
ADVA:  (19516,) (35018,) 0.5573133816894169
ADV1:  0.0 -0.000709648980635076 0.0068701774930576935 0.06011013780366026 -0.06152589332955966
ADVB:  (18657,) (35018,) 0.5327831400993774
ADV2:  0.05537566616150892 0.3100142257826451 0.4601922878679308 3.0 0.0
Policy  Gradients: u/sd/Max

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0012   0.0045   5.5684   2.2087   2.1342
ADVA:  (19184,) (34996,) 0.5481769345068008
ADV1:  0.0011523764033189521 0.0005483354059797412 0.006916575980748189 0.05095885899803243 -0.07720922939510233
ADVB:  (22142,) (34996,) 0.6327008801005829
ADV2:  0.2555309619317305 0.4202201255512325 0.5123860067071083 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9801   0.5278   2.1358  96.9288  38.5936  26.9699
***** Episode 92636, Mean R = -10.2  Std R = 5.8  Min R = -26.5
PolicyLoss: 1.92
Policy_Entropy: 0.24
Policy_KL: 0.00519
Policy_SD: 0.536
Steps: 1.16e+04
TotalSteps: 3.43e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.00221


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0034   5.5684   2.2087   2.1342
ADVA:  (18822,) (35232,) 0.5342302452316077
ADV1:  0.0008472230774802846 0.0005468211399027561 0.006387564059130387 0.053012605876720575 -0.07720922939510233
ADVB:  

***** Episode 92853, Mean R = -9.1  Std R = 4.4  Min R = -20.5
PolicyLoss: 1.7
Policy_Entropy: 0.239
Policy_KL: 0.00449
Policy_SD: 0.531
Steps: 1.18e+04
TotalSteps: 3.44e+07
VF_0_ExplainedVarNew: 0.955
VF_0_ExplainedVarOld: 0.953
VF_0_Loss : 0.000672


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0015   5.5684   2.2087   2.1342
ADVA:  (22748,) (35182,) 0.6465806378261612
ADV1:  0.0011107023359304604 3.135289032217924e-05 0.008183037754665603 0.03444369202138048 -0.06588795829440132
ADVB:  (21421,) (35182,) 0.6088624864987778
ADV2:  0.15505060735759293 0.30546806320866676 0.3897653777562626 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8511   0.4355   1.7527  96.9288  38.5936  26.9699
***** Episode 92884, Mean R = -9.4  Std R = 5.2  Min R = -24.4
PolicyLoss: 1.45
Policy_Entropy: 0.239
Policy_KL: 0.00523
Policy_SD: 0.527
Steps: 1.18e+04
TotalSteps: 3.44e+07
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.000356


ValFun  Grad

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3567   0.1309   0.6153  96.9288  38.5936  26.9699
***** Episode 93101, Mean R = -7.9  Std R = 3.9  Min R = -19.1
PolicyLoss: 1.75
Policy_Entropy: 0.239
Policy_KL: 0.00616
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 3.45e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00027


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0016   5.5684   2.2087   2.1342
ADVA:  (19103,) (35322,) 0.5408244153785177
ADV1:  0.0013263422765675192 0.0012479540600068089 0.005660524086494856 0.03489509896824311 -0.07800372738898204
ADVB:  (22616,) (35322,) 0.6402808447992753
ADV2:  0.3207032590613955 0.5240757327446122 0.6128586448110557 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3021   0.5393   2.4164  96.9288  38.5936  26.9699
***** Episode 93132, Mean R = -8.8  Std R = 4.5  Min R = -21.9
PolicyLoss: 2.38
Policy_Entropy: 0.239
Policy_KL: 0.00503
Policy_SD: 0.525
Steps: 1.18e+04
TotalSteps:

seeker_angles |    0.00    0.00 |    0.07    0.07 |   -1.00   -0.97 |    1.00    0.99
cs_angles |  0.0025  0.0036 |  0.0724  0.0741 | -0.9975 -0.9743 |  0.9986  0.9856
optical_flow | -0.0000 -0.0000 |  0.0201  0.0189 | -1.1146 -0.9962 |  1.0728  0.7980
v_err    | -0.0106 |  0.0596 | -0.4545 |  0.1035
landing_rewards |    9.74 |    1.59 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.01
tracking_rewards |  -14.45 |    3.91 |  -34.03 |   -7.91
steps    |     378 |      20 |     335 |     415
***** Episode 93380, Mean R = -9.0  Std R = 5.1  Min R = -27.9
PolicyLoss: 1.84
Policy_Entropy: 0.239
Policy_KL: 0.00736
Policy_SD: 0.535
Steps: 1.17e+04
TotalSteps: 3.46e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00139


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0008   5.5684   2.2087   2.1342
ADVA:  (19774,) (35116,) 0.563105137259369
ADV1:  0.00044730911982870936 0.00016049477052657105 0.006118525378582487 0.078232

attitude |    0.01    0.03   -0.12 |    1.30    0.68    1.92 |   -3.14   -1.57   -3.14 |    3.14    1.53    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.03   -0.04 |    0.68    1.91 |   -1.54   -3.14 |    1.48    3.10
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.02 |    0.02    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.52
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.98   -0.91 |    0.97    0.99
cs_angles |  0.0015  0.0016 |  0.0729  0.0758 | -0.9849 -0.9111 |  0.9730  0.9913
optical_flow | -0.0001  0.0001 |  0.0210  0.0178 | -1.1259 -0.8633 |  1.0372  1.0170
v_err    | -0.0107 |  0.0598 | -0.4522 |  0.0975
landing_rewards |    9.58 |    2.00 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3986   0.7235   2.8348  96.9288  38.5936  26.9699
Update Cnt = 3030    ET =   1536.8   Stats:  Mean, Std, Min, Max
r_f      |  -15.21  -14.17  -18.08 |  184.33  162.13  207.07 | -395.00 -387.40 -391.19 |  383.50  379.15  389.86
v_f      |    0.00    0.00    0.00 |    0.05    0.04    0.05 |   -0.11   -0.09   -0.10 |    0.09    0.09    0.09
r_i      |  -18.86  -44.30  -55.64 |  695.02  622.33  782.84 |-1333.04-1357.11-1355.26 | 1329.12 1257.96 1295.99
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.09    0.10
norm_rf  |    0.14 |    0.06 |    0.01 |    0.35
norm_vf  |    0.08 |    0.01 |    0.03 |    0.11
gs_f     |    1.46 |    2.49 |    0.01 |   21.02
thrust   |    0.01    0.00    0.00 |    0.65    0.66    0.66 |   -3.41   -3.31   -3.42 |    3.45    3.42    3.34
norm_thrust |    0.88 |    0.72 |    0.00 |    3.46
fuel     |    1.45 |    0.16 |    1.14 |    2.07
rewards  |   -8.68 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5372   0.1984   0.9007  96.9288  38.5936  26.9699
***** Episode 94248, Mean R = -8.0  Std R = 4.9  Min R = -24.5
PolicyLoss: 1.82
Policy_Entropy: 0.241
Policy_KL: 0.00897
Policy_SD: 0.528
Steps: 1.17e+04
TotalSteps: 3.49e+07
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.00141


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0019   5.5684   2.2087   2.1342
ADVA:  (20797,) (35346,) 0.5883834097210434
ADV1:  0.0011336657426943763 0.000866600653145647 0.007273963278756609 0.12733709717561142 -0.057172963919727815
ADVB:  (21857,) (35346,) 0.6183726588581452
ADV2:  0.22188559797374824 0.4394443103545064 0.5498979676135057 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  10.9253   7.1210  25.1168  96.9288  38.5936  26.9699
***** Episode 94279, Mean R = -9.3  Std R = 5.6  Min R = -23.2
PolicyLoss: 2.05
Policy_Entropy: 0.241
Policy_KL: 0.00897
Policy_SD: 0.524
Steps: 1.19e+04
TotalSteps:

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6825   0.2763   1.2520  96.9288  38.5936  26.9699
***** Episode 94496, Mean R = -10.0  Std R = 4.5  Min R = -22.3
PolicyLoss: 1.48
Policy_Entropy: 0.239
Policy_KL: 0.00989
Policy_SD: 0.528
Steps: 1.17e+04
TotalSteps: 3.5e+07
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.958
VF_0_Loss : 0.00039


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0006   5.5684   2.2087   2.1342
ADVA:  (20566,) (34866,) 0.5898583146905294
ADV1:  0.0002580952914036927 -0.0005306921155921331 0.007535195006323205 0.04061251929623849 -0.055329038491351575
ADVB:  (19585,) (34866,) 0.5617220214535651
ADV2:  0.10183295419255792 0.30748764986354005 0.4291265536900851 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6581   0.2895   1.2965  96.9288  38.5936  26.9699
***** Episode 94527, Mean R = -10.7  Std R = 6.3  Min R = -31.4
PolicyLoss: 1.58
Policy_Entropy: 0.24
Policy_KL: 0.00896
Policy_SD: 0.526
Steps: 1.15e+04
TotalSt

ADVA:  (22098,) (34871,) 0.633707091852829
ADV1:  0.00037830966064425833 -0.0005890557057946667 0.007507638635724746 0.05441794566408836 -0.06457045573241615
ADVB:  (18953,) (34871,) 0.5435175360614838
ADV2:  0.06598518057331483 0.252647826081023 0.3540078495742466 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5810   0.2934   1.1311  96.9288  38.5936  26.9699
***** Episode 94744, Mean R = -9.8  Std R = 6.5  Min R = -28.4
PolicyLoss: 1.34
Policy_Entropy: 0.24
Policy_KL: 0.00738
Policy_SD: 0.529
Steps: 1.16e+04
TotalSteps: 3.51e+07
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.000925


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0013   0.0051   5.5684   2.2087   2.1342
ADVA:  (20545,) (34838,) 0.5897296056030771
ADV1:  0.00106255120319982 0.00018490516802725549 0.0073812594248801955 0.05441794566408836 -0.06267729507639418
ADVB:  (21238,) (34838,) 0.6096216774786153
ADV2:  0.18011097956912228 0.3374242920548747 0.42517308751370086 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0009   5.5684   2.2087   2.1342
ADVA:  (18019,) (34877,) 0.5166442067838404
ADV1:  -3.1070578071754756e-05 -0.0014712823716735582 0.007353729509022536 0.03872896536126308 -0.05696360691293312
ADVB:  (17543,) (34877,) 0.5029962439429997
ADV2:  0.004569232730691249 0.2597101914814288 0.4067984695926316 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0671   0.3620   1.8819  96.9288  38.5936  26.9699
***** Episode 94992, Mean R = -9.2  Std R = 4.7  Min R = -21.8
PolicyLoss: 1.49
Policy_Entropy: 0.239
Policy_KL: 0.0101
Policy_SD: 0.525
Steps: 1.17e+04
TotalSteps: 3.52e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.000295


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0009   0.0031   5.5684   2.2087   2.1342
ADVA:  (20479,) (34704,) 0.5901048870447211
ADV1:  0.00032400552225389945 -0.0007646481174441107 0.00835550030961704 0.04669348336257334 -0.05696360691293312
AD

cs_angles |  0.0003 -0.0008 |  0.0740  0.0736 | -0.9922 -0.9981 |  0.9869  0.9907
optical_flow |  0.0000  0.0001 |  0.0198  0.0187 | -0.9464 -1.0718 |  1.0227  0.9260
v_err    | -0.0110 |  0.0605 | -0.4561 |  0.1039
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.08 |    0.04
tracking_rewards |  -14.60 |    3.87 |  -27.11 |   -8.40
steps    |     376 |      20 |     334 |     416
***** Episode 95240, Mean R = -9.1  Std R = 5.1  Min R = -24.6
PolicyLoss: 2.09
Policy_Entropy: 0.241
Policy_KL: 0.00673
Policy_SD: 0.523
Steps: 1.17e+04
TotalSteps: 3.53e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.000287


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   5.5684   2.2087   2.1342
ADVA:  (19097,) (35264,) 0.5415437840290381
ADV1:  0.0008361271558335118 0.00047690097407820166 0.006981056264301628 0.045568822656225194 -0.0690310096418954
ADVB:  (22164,) (35264,) 0.628516333938294
ADV2:  0.

thrust   |    0.00    0.00    0.01 |    0.64    0.66    0.66 |   -3.32   -3.42   -3.21 |    3.34    3.36    3.34
norm_thrust |    0.88 |    0.71 |    0.00 |    3.46
fuel     |    1.45 |    0.18 |    1.03 |    2.18
rewards  |   -9.48 |    5.20 |  -34.86 |   -1.61
fuel_rewards |   -4.15 |    0.51 |   -6.26 |   -2.93
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    3.07 |   13.32 |    0.01 |  276.80
norm_af  |    1.75 |    0.92 |    0.03 |    3.26
norm_wf  |    0.01 |    0.01 |    0.00 |    0.04
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |   -0.09   -0.07   -0.00 |    1.24    0.69    1.84 |   -3.14   -1.56   -3.14 |    3.14    1.57    3.14
w        |   -0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.07   -0.00 |    0.69

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6440   0.2165   1.0721  96.9288  38.5936  26.9699
***** Episode 95829, Mean R = -8.6  Std R = 4.7  Min R = -22.3
PolicyLoss: 1.99
Policy_Entropy: 0.241
Policy_KL: 0.0103
Policy_SD: 0.53
Steps: 1.18e+04
TotalSteps: 3.55e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.000261


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   5.5684   2.2087   2.1342
ADVA:  (19451,) (35297,) 0.5510666628891974
ADV1:  0.00047168543338919205 0.00016467999165758728 0.005552977548844922 0.04836243885664032 -0.09527008181784241
ADVB:  (20440,) (35297,) 0.5790860413066267
ADV2:  0.15180610126674948 0.36710491398127465 0.49981031689570593 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7023   0.3320   1.3828  96.9288  38.5936  26.9699
Update Cnt = 3090    ET =   1586.5   Stats:  Mean, Std, Min, Max
r_f      |  -10.66   -2.44   -2.76 |  196.81  175.38  194.45 | -392.54 -390.98 -377.65 |  386.07

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6114   0.1918   1.1191  96.9288  38.5936  26.9699
***** Episode 96077, Mean R = -9.8  Std R = 6.2  Min R = -29.8
PolicyLoss: 1.45
Policy_Entropy: 0.241
Policy_KL: 0.00897
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps: 3.56e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.00102


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0017   0.0065   5.5684   2.2087   2.1342
ADVA:  (20023,) (35102,) 0.5704233377015555
ADV1:  0.0014965954287700159 0.0005361282393178075 0.007680077130191498 0.04597845677853962 -0.06794364636502215
ADVB:  (23256,) (35102,) 0.6625263517748277
ADV2:  0.2835297910196314 0.41169581130405575 0.46141530919684715 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4761   0.5819   2.7541  96.9288  38.5936  26.9699
***** Episode 96108, Mean R = -9.1  Std R = 4.1  Min R = -18.0
PolicyLoss: 1.8
Policy_Entropy: 0.241
Policy_KL: 0.00657
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps

ADVA:  (20925,) (34883,) 0.5998623971562079
ADV1:  0.0004317198706464357 -0.0001289506121898047 0.007216230735175523 0.04140055269027473 -0.06177528642345781
ADVB:  (19282,) (34883,) 0.5527620904165352
ADV2:  0.07923225583892907 0.29544804616459963 0.42245065276126964 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.4770   1.7442   5.8003  96.9288  38.5936  26.9699
***** Episode 96325, Mean R = -9.2  Std R = 4.4  Min R = -19.1
PolicyLoss: 1.55
Policy_Entropy: 0.241
Policy_KL: 0.0221
Policy_SD: 0.535
Steps: 1.16e+04
TotalSteps: 3.57e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.00024


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0001   0.0006   5.5684   2.2087   2.1342
ADVA:  (18636,) (34847,) 0.5347949608287658
ADV1:  0.0003314974964347076 6.299755005967548e-05 0.005771229355657483 0.043026520302381355 -0.06177528642345781
ADVB:  (21046,) (34847,) 0.6039544293626424
ADV2:  0.19596994803721998 0.39641365635428805 0.51267029768692

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0011   0.0038   5.5684   2.2087   2.1342
ADVA:  (19455,) (35119,) 0.5539736325066204
ADV1:  0.0007038536110613282 0.00041163340585133974 0.005703831038647386 0.03487720218399293 -0.07764859829061782
ADVB:  (20911,) (35119,) 0.5954326717731143
ADV2:  0.17166710867192825 0.3799418565664539 0.5093492462255388 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3819   0.6243   2.4937  96.9288  38.5936  26.9699
***** Episode 96573, Mean R = -8.4  Std R = 3.4  Min R = -17.0
PolicyLoss: 1.84
Policy_Entropy: 0.24
Policy_KL: 0.00992
Policy_SD: 0.533
Steps: 1.15e+04
TotalSteps: 3.58e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 9.1e-05


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   5.5684   2.2087   2.1342
ADVA:  (18903,) (35127,) 0.5381330600392861
ADV1:  0.00046731596659380266 0.00029482049018321563 0.005678280392946124 0.03920127644555743 -0.07764859829061782
ADVB: 

Dynamics: Max Disturbance (m/s^2):  [0.00140921 0.00141043 0.00142314] 0.0024495944750556626
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0028   5.5684   2.2087   2.1342
ADVA:  (20536,) (35117,) 0.5847879944186576
ADV1:  0.0008967240875932311 0.0005309325412655488 0.007068027643408325 0.0502352442093158 -0.07653760220487377
ADVB:  (21282,) (35117,) 0.6060312669077654
ADV2:  0.21608565739585206 0.4210485675940504 0.5296710527166726 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2464   0.4942   2.5453  96.9288  38.5936  26.9699
***** Episode 96821, Mean R = -8.5  Std R = 4.1  Min R = -17.5
PolicyLoss: 2.01
Policy_Entropy: 0.241
Policy_KL: 0.00618
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 3.59e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.000269


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0010   5.5684   2.2087   2.1342
ADVA:  (21733,) (35117,) 0.6188740496056041
ADV1:  0.0 -0.0004082550677

w        |   -0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.01    0.10 |    0.67    1.97 |   -1.50   -3.13 |    1.40    3.14
w_f      |   -0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.04   -0.03   -0.01 |    0.03    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.52
seeker_angles |   -0.00    0.00 |    0.07    0.08 |   -0.99   -1.00 |    1.00    0.99
cs_angles | -0.0007  0.0015 |  0.0732  0.0772 | -0.9927 -0.9966 |  0.9991  0.9943
optical_flow |  0.0000  0.0001 |  0.0199  0.0192 | -1.0025 -0.8780 |  1.2155  1.0562
v_err    | -0.0110 |  0.0605 | -0.4556 |  0.1091
landing_rewards |    9.84 |    1.26 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.07 |    0.01
tracking_rewards |  -14.61 |    4.29 |  -29.01 |   -7.38
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.9617   1.4966   7.1362  96.9288  38.5936  26.9699
Update Cnt = 3140    ET =   1382.1   Stats:  Mean, Std, Min, Max
r_f      |    5.25  -15.26   -7.41 |  184.76  186.09  197.94 | -390.31 -383.79 -388.95 |  389.62  395.32  380.12
v_f      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.08   -0.11   -0.09 |    0.10    0.10    0.11
r_i      |   29.28  -43.15  -10.58 |  696.69  683.11  742.53 |-1325.73-1334.24-1300.69 | 1253.89 1276.25 1365.92
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.10    0.09
norm_rf  |    0.14 |    0.06 |    0.02 |    0.33
norm_vf  |    0.07 |    0.01 |    0.03 |    0.11
gs_f     |    1.10 |    1.38 |    0.00 |    9.75
thrust   |   -0.00    0.00    0.00 |    0.67    0.67    0.66 |   -3.34   -3.34   -3.35 |    3.44    3.37    3.21
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.49 |    0.18 |    1.04 |    2.08
rewards  |   -9.28 

ADVA:  (19032,) (34945,) 0.5446272714265274
ADV1:  0.00010422195826015335 0.00020309550579012802 0.006091926863614548 0.10586684393594792 -0.06490597753248506
ADVB:  (19199,) (34945,) 0.5494062097581914
ADV2:  0.093990296783823 0.38774085332022756 0.5482975315576811 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7181   0.2499   1.1914  96.9288  38.5936  26.9699
***** Episode 97658, Mean R = -8.7  Std R = 4.7  Min R = -20.9
PolicyLoss: 2.03
Policy_Entropy: 0.241
Policy_KL: 0.00635
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 3.62e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.000472


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0018   5.5684   2.2087   2.1342
ADVA:  (18663,) (35123,) 0.5313612162970134
ADV1:  2.157953346995558e-05 0.00011245231718714389 0.005637686413084518 0.03784701440709698 -0.06490597753248506
ADVB:  (18926,) (35123,) 0.5388491871423284
ADV2:  0.08075426396036406 0.3925868375509235 0.5653156033701703

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0018   0.0069   5.5684   2.2087   2.1342
ADVA:  (20823,) (34983,) 0.5952319698139096
ADV1:  0.0012648191021664337 0.0005466882332122001 0.00895859570167122 0.04340809330865619 -0.06456762561639325
ADVB:  (21921,) (34983,) 0.6266186433410513
ADV2:  0.20798697169856717 0.3986737205583546 0.5094613854253538 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4912   0.6638   2.9974  96.9288  38.5936  26.9699
***** Episode 97906, Mean R = -10.6  Std R = 5.7  Min R = -27.0
PolicyLoss: 1.84
Policy_Entropy: 0.24
Policy_KL: 0.00666
Policy_SD: 0.542
Steps: 1.16e+04
TotalSteps: 3.63e+07
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.964
VF_0_Loss : 0.00123


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0031   5.5684   2.2087   2.1342
ADVA:  (18913,) (34875,) 0.5423082437275986
ADV1:  0.0006410452417015215 0.0003241015544785338 0.008665178013855293 0.07760320707278667 -0.06456762561639325
ADVB:  (2

***** Episode 98123, Mean R = -9.2  Std R = 3.1  Min R = -15.6
PolicyLoss: 1.65
Policy_Entropy: 0.241
Policy_KL: 0.0062
Policy_SD: 0.534
Steps: 1.19e+04
TotalSteps: 3.64e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.000322


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0006   0.0022   5.5684   2.2087   2.1342
ADVA:  (19258,) (35558,) 0.5415940154114405
ADV1:  6.054749711755913e-05 -0.00018061939013064432 0.006360725277947087 0.04262385218681669 -0.10138680991902976
ADVB:  (19449,) (35558,) 0.546965521120423
ADV2:  0.09352039114969143 0.34602697224395856 0.4924308285171074 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6510   0.2384   1.1681  96.9288  38.5936  26.9699
***** Episode 98154, Mean R = -8.8  Std R = 4.6  Min R = -20.2
PolicyLoss: 1.81
Policy_Entropy: 0.241
Policy_KL: 0.00632
Policy_SD: 0.533
Steps: 1.2e+04
TotalSteps: 3.64e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.000338


ValFun  Grad

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1640   0.4994   2.1039  96.9288  38.5936  26.9699
***** Episode 98371, Mean R = -9.0  Std R = 5.7  Min R = -27.4
PolicyLoss: 1.97
Policy_Entropy: 0.24
Policy_KL: 0.00621
Policy_SD: 0.53
Steps: 1.18e+04
TotalSteps: 3.65e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.000562


Dynamics: Max Disturbance (m/s^2):  [0.00140921 0.00141043 0.00142314] 0.0024495944750556626
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0044   5.5684   2.2087   2.1342
ADVA:  (19073,) (35252,) 0.541047316464314
ADV1:  0.00015225617848218208 2.7403762596676496e-06 0.006043054589315191 0.04000862823222828 -0.08509806093127431
ADVB:  (19456,) (35252,) 0.5519119482582548
ADV2:  0.11159455584428045 0.3776942381875021 0.5192668676872493 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8548   0.3212   1.4378  96.9288  38.5936  26.9699
***** Episode 98402, Mean R = -9.8  Std R = 5.1  Min R = -24.2
PolicyLo

attitude |   -0.03    0.03    0.11 |    1.23    0.65    1.88 |   -3.14   -1.56   -3.14 |    3.14    1.56    3.14
w        |   -0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.03    0.10 |    0.66    1.88 |   -1.52   -3.13 |    1.41    3.11
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.02 |    0.03    0.04    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.61
seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -0.97   -0.97 |    0.99    0.98
cs_angles |  0.0011 -0.0006 |  0.0757  0.0763 | -0.9658 -0.9685 |  0.9901  0.9791
optical_flow | -0.0000 -0.0000 |  0.0187  0.0171 | -1.0217 -0.8741 |  0.9931  0.8923
v_err    | -0.0113 |  0.0606 | -0.4572 |  0.1085
landing_rewards |    9.81 |    1.38 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6876   0.2840   1.3265  96.9288  38.5936  26.9699
Update Cnt = 3190    ET =   1321.8   Stats:  Mean, Std, Min, Max
r_f      |   -2.13    3.31  -11.03 |  192.75  171.90  202.86 | -388.63 -392.44 -392.09 |  386.24  377.31  386.14
v_f      |   -0.00   -0.00    0.00 |    0.04    0.04    0.04 |   -0.08   -0.09   -0.09 |    0.09    0.08    0.09
r_i      |    2.07    4.99  -45.86 |  715.78  648.71  754.22 |-1329.83-1230.97-1371.36 | 1295.00 1287.77 1334.31
v_i      |    0.00   -0.00    0.00 |    0.05    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.13 |    0.06 |    0.01 |    0.33
norm_vf  |    0.07 |    0.01 |    0.04 |    0.11
gs_f     |    1.27 |    1.99 |    0.01 |   23.64
thrust   |   -0.00    0.00    0.00 |    0.65    0.65    0.66 |   -3.00   -3.31   -3.41 |    3.44    3.42    3.43
norm_thrust |    0.88 |    0.71 |    0.00 |    3.46
fuel     |    1.46 |    0.19 |    1.02 |    2.19
rewards  |   -9.16 

ADVA:  (22208,) (35117,) 0.6324002619813766
ADV1:  0.0006530527868969429 -0.0005568572484599379 0.008527198391264704 0.057270420200070116 -0.09167749128208114
ADVB:  (20041,) (35117,) 0.5706922573112737
ADV2:  0.11434752256837086 0.2940235754944553 0.40345528482397713 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5934   0.2003   0.9795  96.9288  38.5936  26.9699
***** Episode 99208, Mean R = -9.5  Std R = 4.7  Min R = -22.3
PolicyLoss: 1.48
Policy_Entropy: 0.242
Policy_KL: 0.0076
Policy_SD: 0.522
Steps: 1.17e+04
TotalSteps: 3.68e+07
VF_0_ExplainedVarNew: 0.966
VF_0_ExplainedVarOld: 0.965
VF_0_Loss : 0.000389


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0001   0.0006   5.5684   2.2087   2.1342
ADVA:  (19827,) (35287,) 0.5618783121262788
ADV1:  0.0012354368508163578 0.0005430829794205334 0.007263821391136298 0.057270420200070116 -0.04879477926264647
ADVB:  (23544,) (35287,) 0.6672145549352453
ADV2:  0.28821803980055605 0.4233464944877153 0.49410005157755

Dynamics: Max Disturbance (m/s^2):  [0.00140921 0.00141043 0.00142314] 0.0024495944750556626
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0022   0.0077   5.5684   2.2087   2.1342
ADVA:  (20932,) (35232,) 0.5941189827429609
ADV1:  0.00022519435983139185 -0.0003728398544741577 0.006860588359211025 0.04935064076656037 -0.07578960068928614
ADVB:  (19135,) (35232,) 0.5431142143505904
ADV2:  0.0732140492001234 0.3080757451670588 0.44557465373050925 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6250   0.2878   1.2725  96.9288  38.5936  26.9699
***** Episode 99456, Mean R = -10.0  Std R = 5.8  Min R = -25.3
PolicyLoss: 1.62
Policy_Entropy: 0.242
Policy_KL: 0.00629
Policy_SD: 0.528
Steps: 1.18e+04
TotalSteps: 3.69e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.000277


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0013   0.0046   5.5684   2.2087   2.1342
ADVA:  (20575,) (35236,) 0.5839198546940629
ADV1:  0.00055830971439

***** Episode 99673, Mean R = -10.2  Std R = 4.6  Min R = -19.8
PolicyLoss: 2.04
Policy_Entropy: 0.242
Policy_KL: 0.0104
Policy_SD: 0.534
Steps: 1.16e+04
TotalSteps: 3.7e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.00776


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0014   0.0051   5.5684   2.2087   2.1342
ADVA:  (20585,) (34882,) 0.590132446534029
ADV1:  0.0 -0.0003493994876780303 0.006371918415916801 0.0438193556170936 -0.06620472736202476
ADVB:  (17260,) (34882,) 0.49481107734648244
ADV2:  0.0 0.2753630563302451 0.4359360284476969 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5207   0.1767   0.9327  96.9288  38.5936  26.9699
***** Episode 99704, Mean R = -10.5  Std R = 5.1  Min R = -24.4
PolicyLoss: 1.59
Policy_Entropy: 0.242
Policy_KL: 0.0117
Policy_SD: 0.537
Steps: 1.18e+04
TotalSteps: 3.7e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00685


Dynamics: Max Disturbance (m/s^2):  [0.00140921 0.

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1842   0.4471   2.0114  96.9288  38.5936  26.9699
***** Episode 99921, Mean R = -8.4  Std R = 5.2  Min R = -27.5
PolicyLoss: 2.18
Policy_Entropy: 0.243
Policy_KL: 0.00932
Policy_SD: 0.529
Steps: 1.15e+04
TotalSteps: 3.71e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.000755


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0029   5.5684   2.2087   2.1342
ADVA:  (20643,) (34835,) 0.5925936558059423
ADV1:  0.00037661613416983906 0.00018597848709504114 0.006551922312626296 0.04712947661039402 -0.05237266679364666
ADVB:  (19423,) (34835,) 0.5575714080665997
ADV2:  0.10826077413701597 0.36847333373832486 0.5191759851802792 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6383   0.2193   1.0296  96.9288  38.5936  26.9699
***** Episode 99952, Mean R = -8.9  Std R = 4.2  Min R = -17.1
PolicyLoss: 1.89
Policy_Entropy: 0.242
Policy_KL: 0.0287
Policy_SD: 0.528
Steps: 1.17e+04
TotalSte

seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.98   -0.98 |    1.00    0.96
cs_angles |  0.0026  0.0027 |  0.0719  0.0730 | -0.9839 -0.9779 |  0.9971  0.9633
optical_flow | -0.0001  0.0000 |  0.0195  0.0191 | -0.9514 -1.1200 |  1.0764  1.0481
v_err    | -0.0108 |  0.0596 | -0.4544 |  0.1060
landing_rewards |    9.65 |    1.85 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -14.27 |    3.69 |  -32.18 |   -7.82
steps    |     376 |      20 |     334 |     418
***** Episode 100200, Mean R = -8.7  Std R = 4.4  Min R = -17.9
PolicyLoss: 1.5
Policy_Entropy: 0.242
Policy_KL: 0.0085
Policy_SD: 0.525
Steps: 1.16e+04
TotalSteps: 3.72e+07
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.959
VF_0_Loss : 0.00126


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0011   0.0043   5.5684   2.2087   2.1342
ADVA:  (20296,) (34939,) 0.580898136752626
ADV1:  0.0006777933411662796 4.461574413281311e-05 0.007640552101737728 0.060635996

attitude |    0.02   -0.03    0.03 |    1.12    0.65    1.80 |   -3.14   -1.54   -3.14 |    3.14    1.48    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.03   -0.01 |    0.66    1.79 |   -1.41   -3.14 |    1.43    3.07
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.02 |    0.04    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.57
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.98   -0.93 |    0.98    1.00
cs_angles |  0.0009  0.0030 |  0.0727  0.0778 | -0.9841 -0.9297 |  0.9792  0.9994
optical_flow | -0.0000  0.0002 |  0.0211  0.0187 | -1.0397 -0.9743 |  1.1284  1.0555
v_err    | -0.0110 |  0.0602 | -0.4522 |  0.0993
landing_rewards |    9.84 |    1.26 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1844   0.5775   2.6885  96.9288  38.5936  26.9699
Update Cnt = 3250    ET =   1454.7   Stats:  Mean, Std, Min, Max
r_f      |  -12.79    5.14   -8.26 |  191.27  179.24  196.69 | -395.47 -384.79 -395.83 |  375.44  390.77  386.10
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.10    0.09    0.10
r_i      |  -44.96   40.48  -14.76 |  697.14  672.19  744.81 |-1266.23-1258.31-1316.16 | 1266.36 1327.13 1360.40
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.04 |   -0.08   -0.10   -0.10 |    0.10    0.10    0.10
norm_rf  |    0.13 |    0.05 |    0.01 |    0.30
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.14 |    1.54 |    0.00 |   13.65
thrust   |   -0.00   -0.01   -0.00 |    0.66    0.66    0.66 |   -3.36   -3.37   -3.33 |    3.42    3.41    3.27
norm_thrust |    0.90 |    0.71 |    0.00 |    3.46
fuel     |    1.47 |    0.16 |    1.11 |    1.96
rewards  |   -9.63 

ADVA:  (18958,) (35072,) 0.5405451642335767
ADV1:  0.0009292804738837727 0.00045657793781493525 0.006523221365441715 0.032424566591317094 -0.05007168689704436
ADVB:  (22402,) (35072,) 0.6387431569343066
ADV2:  0.24899132052178452 0.4125208598615526 0.4882614727013734 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0201   0.4294   2.0134  96.9288  38.5936  26.9699
***** Episode 101068, Mean R = -8.4  Std R = 4.5  Min R = -20.6
PolicyLoss: 1.87
Policy_Entropy: 0.241
Policy_KL: 0.0095
Policy_SD: 0.538
Steps: 1.16e+04
TotalSteps: 3.75e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.000375


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0023   0.0081   5.5684   2.2087   2.1342
ADVA:  (19499,) (35089,) 0.5557012169055829
ADV1:  0.0003449125928955358 -0.0002572106971652305 0.007282845920872772 0.032424566591317094 -0.0551617475910869
ADVB:  (20565,) (35089,) 0.5860811080395566
ADV2:  0.14570116995083693 0.34310824961385394 0.4424376586645

Dynamics: Max Disturbance (m/s^2):  [0.00140921 0.00141043 0.00142314] 0.0024495944750556626
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0014   5.5684   2.2087   2.1342
ADVA:  (20951,) (35045,) 0.5978313596804109
ADV1:  0.0 -0.0006454551297845754 0.0073135120837578334 0.06182925562308683 -0.07314822376038893
ADVB:  (18581,) (35045,) 0.5302040233984877
ADV2:  0.04917037778423749 0.2703386398111611 0.4124344383740077 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8949   0.3723   1.8151  96.9288  38.5936  26.9699
***** Episode 101316, Mean R = -8.5  Std R = 4.4  Min R = -20.6
PolicyLoss: 1.46
Policy_Entropy: 0.242
Policy_KL: 0.00817
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 3.76e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00032


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0007   5.5684   2.2087   2.1342
ADVA:  (20982,) (34971,) 0.5999828429269967
ADV1:  0.0003987108240884295 -0.00032789086

***** Episode 101533, Mean R = -8.4  Std R = 4.4  Min R = -21.8
PolicyLoss: 1.75
Policy_Entropy: 0.243
Policy_KL: 0.00707
Policy_SD: 0.53
Steps: 1.17e+04
TotalSteps: 3.77e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.000187


Dynamics: Max Disturbance (m/s^2):  [0.00140921 0.00141043 0.00142314] 0.0024495944750556626
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0003   0.0010   5.5684   2.2087   2.1342
ADVA:  (20614,) (35257,) 0.5846782199279575
ADV1:  0.0 -0.0011034452084828067 0.007144758687720616 0.044481972051568064 -0.07062759521128815
ADVB:  (16996,) (35257,) 0.48206030008225315
ADV2:  0.0 0.24462202033894848 0.3932156735746998 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3575   0.1161   0.6504  96.9288  38.5936  26.9699
***** Episode 101564, Mean R = -8.4  Std R = 4.0  Min R = -18.7
PolicyLoss: 1.45
Policy_Entropy: 0.242
Policy_KL: 0.00687
Policy_SD: 0.526
Steps: 1.18e+04
TotalSteps: 3.77e+07
VF_0_ExplainedVarNew: 0.976

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4537   0.4822   2.3183  96.9288  38.5936  26.9699
***** Episode 101781, Mean R = -10.6  Std R = 5.1  Min R = -25.1
PolicyLoss: 1.75
Policy_Entropy: 0.243
Policy_KL: 0.00904
Policy_SD: 0.53
Steps: 1.17e+04
TotalSteps: 3.78e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.00029


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0034   0.0020   0.0074   5.5684   2.2087   2.1342
ADVA:  (20637,) (35123,) 0.5875637046949292
ADV1:  0.0013116589436076055 0.0007604505919895241 0.006973427355213065 0.03505743857685273 -0.07071533032176355
ADVB:  (22246,) (35123,) 0.6333741422999174
ADV2:  0.23255490399832518 0.4103086200692088 0.4964470620245069 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0026   0.3415   1.7379  96.9288  38.5936  26.9699
***** Episode 101812, Mean R = -8.7  Std R = 4.3  Min R = -17.7
PolicyLoss: 1.86
Policy_Entropy: 0.243
Policy_KL: 0.00749
Policy_SD: 0.528
Steps: 1.17e+04
TotalSte

cs_angles |  0.0035  0.0031 |  0.0727  0.0759 | -0.9983 -0.9688 |  0.9866  0.9937
optical_flow | -0.0001 -0.0000 |  0.0182  0.0162 | -1.2085 -0.9678 |  1.1214  1.1398
v_err    | -0.0110 |  0.0603 | -0.4621 |  0.0942
landing_rewards |    9.77 |    1.49 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.01
tracking_rewards |  -14.80 |    4.25 |  -31.22 |   -7.21
steps    |     379 |      20 |     338 |     420
***** Episode 102060, Mean R = -7.6  Std R = 3.2  Min R = -15.1
PolicyLoss: 2.22
Policy_Entropy: 0.243
Policy_KL: 0.00669
Policy_SD: 0.526
Steps: 1.17e+04
TotalSteps: 3.79e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.000134


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0006   0.0022   5.5684   2.2087   2.1342
ADVA:  (19225,) (35249,) 0.5454055434196715
ADV1:  0.00032410915350194677 0.0001892702910738564 0.00522598590117458 0.120321397009162 -0.05750716832229724
ADVB:  (20027,) (35249,) 0.568157961927998
ADV2:  0.13

attitude |   -0.02    0.01   -0.10 |    1.12    0.71    1.79 |   -3.14   -1.57   -3.14 |    3.14    1.52    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.01   -0.12 |    0.70    1.81 |   -1.52   -3.12 |    1.46    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.01 |    0.02    0.02    0.04
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.38
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -1.00   -1.00 |    0.99    0.96
cs_angles |  0.0010  0.0022 |  0.0727  0.0767 | -0.9985 -0.9967 |  0.9893  0.9638
optical_flow | -0.0001  0.0000 |  0.0189  0.0192 | -0.9804 -1.1096 |  1.1040  1.2816
v_err    | -0.0110 |  0.0601 | -0.4525 |  0.1005
landing_rewards |    9.52 |    2.15 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5628   0.2544   1.1801  96.9288  38.5936  26.9699
Update Cnt = 3310    ET =   1406.8   Stats:  Mean, Std, Min, Max
r_f      |   -9.73    8.59   -8.91 |  191.73  157.20  201.97 | -391.34 -370.31 -372.20 |  393.39  376.34  396.29
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.11   -0.09   -0.10 |    0.10    0.09    0.09
r_i      |  -38.20    3.24  -26.92 |  687.62  641.53  767.96 |-1312.74-1314.55-1319.57 | 1306.29 1285.26 1255.37
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.13 |    0.06 |    0.01 |    0.39
norm_vf  |    0.08 |    0.01 |    0.03 |    0.11
gs_f     |    1.32 |    1.97 |    0.00 |   18.16
thrust   |    0.00    0.00    0.00 |    0.66    0.67    0.65 |   -3.23   -3.34   -3.43 |    3.34    3.27    3.38
norm_thrust |    0.89 |    0.71 |    0.00 |    3.46
fuel     |    1.45 |    0.18 |    1.07 |    2.04
rewards  |   -9.40 

ADVA:  (19777,) (34965,) 0.5656227656227656
ADV1:  0.0009130570257125867 0.0001153127628082683 0.007193681892047909 0.03991451933681267 -0.05614478072100751
ADVB:  (20989,) (34965,) 0.6002860002860003
ADV2:  0.1738236677356369 0.34208716250821203 0.4352459188928089 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6029   0.2376   1.0806  96.9288  38.5936  26.9699
***** Episode 102928, Mean R = -10.4  Std R = 6.4  Min R = -24.6
PolicyLoss: 1.64
Policy_Entropy: 0.243
Policy_KL: 0.0105
Policy_SD: 0.531
Steps: 1.16e+04
TotalSteps: 3.82e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.000202


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0020   5.5684   2.2087   2.1342
ADVA:  (18509,) (34908,) 0.5302222986134983
ADV1:  0.0006381598718073252 0.00029934301737399925 0.006456955178613515 0.0629249702433628 -0.05478350581063446
ADVB:  (21243,) (34908,) 0.6085424544517016
ADV2:  0.19240499874416842 0.38336446814524877 0.495241713231129

Dynamics: Max Disturbance (m/s^2):  [0.00140921 0.00141043 0.00142314] 0.0024495944750556626
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0023   5.5684   2.2087   2.1342
ADVA:  (21820,) (35354,) 0.6171861741245687
ADV1:  0.0 -0.0006476318089253483 0.007330122292981984 0.056459070380038834 -0.056467820363791776
ADVB:  (17599,) (35354,) 0.49779374328223114
ADV2:  0.0 0.28784640110033766 0.4305521781291349 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5194   0.1675   0.8948  96.9288  38.5936  26.9699
***** Episode 103176, Mean R = -10.4  Std R = 6.0  Min R = -26.1
PolicyLoss: 1.66
Policy_Entropy: 0.242
Policy_KL: 0.0115
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 3.83e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.000644


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0031   5.5684   2.2087   2.1342
ADVA:  (20272,) (35095,) 0.5776321413306739
ADV1:  0.0014248308266958647 0.0007559300985919902 0.0

***** Episode 103393, Mean R = -9.7  Std R = 4.9  Min R = -19.2
PolicyLoss: 2.14
Policy_Entropy: 0.234
Policy_KL: 1.39
Policy_SD: 0.531
Steps: 1.18e+04
TotalSteps: 3.84e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00118


Dynamics: Max Disturbance (m/s^2):  [0.00140921 0.00141043 0.00142314] 0.0024495944750556626
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0014   5.5684   2.2087   2.1342
ADVA:  (19194,) (35176,) 0.5456561291789856
ADV1:  0.0 -5.852406095559105e-05 0.00611343590197724 0.06043513538457945 -0.073524477246036
ADVB:  (18554,) (35176,) 0.5274619058448943
ADV2:  0.058533214026216585 0.3611695562726812 0.533643384644613 3.0 0.0
 *** BROKE ***   0 0.8128612637519836
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  71.3204   0.0000  71.3204 108.2148  71.3204  33.5059
***** Episode 103424, Mean R = -10.2  Std R = 8.8  Min R = -51.7
PolicyLoss: 2.12
Policy_Entropy: 0.236
Policy_KL: 0.813
Policy_SD: 0.531
Steps: 1.16e+04
Tota

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7713   0.2899   1.3479 108.2148  71.3204  33.5059
***** Episode 103641, Mean R = -8.6  Std R = 4.2  Min R = -18.9
PolicyLoss: 1.91
Policy_Entropy: 0.242
Policy_KL: 0.00652
Policy_SD: 0.53
Steps: 1.15e+04
TotalSteps: 3.85e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.000523


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0026   5.5684   2.2087   2.1342
ADVA:  (20041,) (35290,) 0.5678945877018986
ADV1:  0.00026360928261835986 -0.00016973154070715337 0.00630281928506022 0.04169256019612205 -0.07011553421240896
ADVB:  (19549,) (35290,) 0.5539529611788042
ADV2:  0.09162878804808446 0.31659161697831345 0.4549444783762995 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4621   0.1498   0.7700 108.2148  71.3204  33.5059
***** Episode 103672, Mean R = -9.4  Std R = 4.8  Min R = -24.4
PolicyLoss: 1.64
Policy_Entropy: 0.242
Policy_KL: 0.00636
Policy_SD: 0.526
Steps: 1.19e+04
Total

attitude |    0.04   -0.04   -0.04 |    1.14    0.66    1.82 |   -3.14   -1.55   -3.14 |    3.14    1.54    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.05   -0.12 |    0.66    1.82 |   -1.44   -3.13 |    1.37    3.10
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.02 |    0.03    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.52
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.99   -1.00 |    0.98    0.91
cs_angles |  0.0036  0.0029 |  0.0738  0.0749 | -0.9902 -1.0000 |  0.9820  0.9125
optical_flow |  0.0000 -0.0000 |  0.0184  0.0182 | -1.0496 -0.9885 |  0.9939  1.0784
v_err    | -0.0113 |  0.0605 | -0.4539 |  0.1019
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (19574,) (35314,) 0.554284419776859
ADV1:  0.00031557717391861306 9.380117919847884e-05 0.005755752112677477 0.043541640928028646 -0.06230552297358244
ADVB:  (20255,) (35314,) 0.573568556379906
ADV2:  0.14672765987048364 0.37561074842484615 0.5187228727859599 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5024   0.1858   0.7884 108.2148  71.3204  33.5059
Update Cnt = 3360    ET =   2074.3   Stats:  Mean, Std, Min, Max
r_f      |   -9.97  -10.24  -10.58 |  189.40  172.13  199.94 | -376.47 -392.11 -382.69 |  393.95  390.34  389.55
v_f      |    0.00    0.00    0.00 |    0.05    0.04    0.05 |   -0.09   -0.11   -0.09 |    0.10    0.09    0.10
r_i      |  -52.77    5.50  -10.42 |  692.81  657.36  758.07 |-1266.83-1309.86-1314.49 | 1286.19 1313.01 1290.12
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.13 |    0.06 |    0.02 |    0.33
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f   

Dynamics: Max Disturbance (m/s^2):  [0.00140921 0.00141043 0.00142314] 0.0024495944750556626
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0012   5.5684   2.2087   2.1342
ADVA:  (19300,) (35113,) 0.5496539743115085
ADV1:  0.0003244482655487841 0.00025208205274137197 0.005663585219830151 0.04011706609361693 -0.06630403533106188
ADVB:  (19854,) (35113,) 0.5654316065274969
ADV2:  0.11404614385524012 0.36308419238236045 0.5107610841914157 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0022   0.4607   1.9144 108.2148  71.3204  33.5059
***** Episode 104478, Mean R = -8.0  Std R = 4.3  Min R = -23.7
PolicyLoss: 1.84
Policy_Entropy: 0.243
Policy_KL: 0.00773
Policy_SD: 0.522
Steps: 1.17e+04
TotalSteps: 3.88e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.000536


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0040   0.0024   0.0081   5.5684   2.2087   2.1342
ADVA:  (20705,) (35145,) 0.5891307440603215
ADV1:  0.0 -0.000642956

***** Episode 104695, Mean R = -7.5  Std R = 4.1  Min R = -18.5
PolicyLoss: 1.5
Policy_Entropy: 0.243
Policy_KL: 0.00601
Policy_SD: 0.51
Steps: 1.19e+04
TotalSteps: 3.89e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00753


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0021   5.5684   2.2087   2.1342
ADVA:  (21277,) (35351,) 0.6018783061299539
ADV1:  0.0008358160740325939 0.00017554589901361644 0.006909544505965425 0.04454775092957869 -0.07405561438132074
ADVB:  (21461,) (35351,) 0.6070832508274164
ADV2:  0.17084673675021236 0.34747477744182886 0.43560762134065223 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7894   0.2832   1.5531 108.2148  71.3204  33.5059
***** Episode 104726, Mean R = -8.7  Std R = 4.4  Min R = -21.4
PolicyLoss: 1.64
Policy_Entropy: 0.243
Policy_KL: 0.00572
Policy_SD: 0.513
Steps: 1.17e+04
TotalSteps: 3.89e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.00116


Dynamics: Ma

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3481   0.5263   2.4771 108.2148  71.3204  33.5059
***** Episode 104943, Mean R = -12.0  Std R = 11.9  Min R = -71.6
PolicyLoss: 1.64
Policy_Entropy: 0.243
Policy_KL: 0.0138
Policy_SD: 0.527
Steps: 1.17e+04
TotalSteps: 3.9e+07
VF_0_ExplainedVarNew: 0.972
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.000586


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0045   0.0026   0.0094   5.5684   2.2087   2.1342
ADVA:  (20503,) (35507,) 0.5774354352662855
ADV1:  0.0012467282858468834 0.0006328055417192844 0.00920821764802365 0.12279714623198665 -0.11064388172893858
ADVB:  (22490,) (35507,) 0.6333962317289549
ADV2:  0.25917297689308516 0.4345779748314955 0.517512581373199 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6462   0.6042   3.3058 108.2148  71.3204  33.5059
***** Episode 104974, Mean R = -8.7  Std R = 3.5  Min R = -18.1
PolicyLoss: 1.97
Policy_Entropy: 0.243
Policy_KL: 0.0108
Policy_SD: 0.524
Steps: 1.18e+04
TotalSteps

ADVA:  (20592,) (35408,) 0.5815634884771803
ADV1:  0.0007825918336093109 0.0001742822202036405 0.006493246623232499 0.042222935516953386 -0.05183448940222949
ADVB:  (21452,) (35408,) 0.6058517849073656
ADV2:  0.18463889786594512 0.3586712532629232 0.45487648519273216 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3474   0.1241   0.6869 108.2148  71.3204  33.5059
***** Episode 105191, Mean R = -8.8  Std R = 4.7  Min R = -24.3
PolicyLoss: 1.7
Policy_Entropy: 0.243
Policy_KL: 0.00722
Policy_SD: 0.521
Steps: 1.18e+04
TotalSteps: 3.91e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.00282


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0029   5.5684   2.2087   2.1342
ADVA:  (21610,) (35364,) 0.611073407985522
ADV1:  0.0005346993090148558 -0.00017732180727864137 0.006758199312845767 0.042222935516953386 -0.09208338076156042
ADVB:  (19642,) (35364,) 0.5554235946159937
ADV2:  0.09043237481250199 0.29473491034755217 0.4122906370226

attitude |    0.07    0.01   -0.17 |    1.09    0.63    1.78 |   -3.14   -1.54   -3.14 |    3.14    1.56    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.02   -0.14 |    0.62    1.78 |   -1.51   -3.09 |    1.39    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.01 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.36
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.93   -0.94 |    0.98    1.00
cs_angles |  0.0007  0.0010 |  0.0726  0.0741 | -0.9257 -0.9392 |  0.9775  0.9984
optical_flow |  0.0001  0.0001 |  0.0180  0.0168 | -0.9557 -0.9531 |  1.0743  1.1422
v_err    | -0.0112 |  0.0601 | -0.4530 |  0.1026
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8323   0.3256   1.6986 108.2148  71.3204  33.5059
Update Cnt = 3410    ET =   1677.5   Stats:  Mean, Std, Min, Max
r_f      |   -6.07   -5.54   13.60 |  182.21  178.18  194.06 | -387.58 -379.78 -387.76 |  391.17  388.52  395.17
v_f      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.09    0.11    0.09
r_i      |   -2.10  -34.80   42.23 |  694.92  665.14  738.45 |-1330.95-1282.34-1360.10 | 1252.86 1308.87 1343.23
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.13 |    0.05 |    0.02 |    0.27
norm_vf  |    0.08 |    0.01 |    0.03 |    0.13
gs_f     |    1.29 |    2.09 |    0.00 |   24.73
thrust   |   -0.00    0.00    0.00 |    0.65    0.67    0.66 |   -3.45   -3.43   -3.41 |    3.35    3.44    3.46
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.44 |    0.19 |    1.04 |    2.11
rewards  |   -9.66 

ADVA:  (20771,) (35412,) 0.5865525810459731
ADV1:  0.0003511267899155377 -0.00012063593918526748 0.006910940985656927 0.045172028960740906 -0.0644371608585253
ADVB:  (19801,) (35412,) 0.5591607364735118
ADV2:  0.09287240427412365 0.30825797089634943 0.43910140247231777 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4368   0.1817   0.8431 108.2148  71.3204  33.5059
***** Episode 106028, Mean R = -9.2  Std R = 5.4  Min R = -24.5
PolicyLoss: 1.57
Policy_Entropy: 0.243
Policy_KL: 0.00752
Policy_SD: 0.526
Steps: 1.18e+04
TotalSteps: 3.94e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.000277


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   5.5684   2.2087   2.1342
ADVA:  (22456,) (35469,) 0.6331162423524768
ADV1:  0.0005740825626661429 -0.0002984264200179071 0.007346514531296007 0.045172028960740906 -0.07557535145788558
ADVB:  (20142,) (35469,) 0.5678761735600102
ADV2:  0.10200019127160809 0.2828478395412232 0.3893756207

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0011   0.0039   5.5684   2.2087   2.1342
ADVA:  (19549,) (35003,) 0.5584949861440448
ADV1:  0.0007915067831212432 0.0002390632917374417 0.0075608203515548185 0.04801316361324853 -0.07838745339935727
ADVB:  (21671,) (35003,) 0.6191183612833185
ADV2:  0.21190631643382846 0.3946247995137966 0.48168817773400324 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6332   0.2339   1.1533 108.2148  71.3204  33.5059
***** Episode 106276, Mean R = -7.9  Std R = 5.0  Min R = -23.7
PolicyLoss: 1.83
Policy_Entropy: 0.244
Policy_KL: 0.00655
Policy_SD: 0.526
Steps: 1.17e+04
TotalSteps: 3.95e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.000884


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0011   0.0042   5.5684   2.2087   2.1342
ADVA:  (21750,) (34977,) 0.6218372073076593
ADV1:  0.00033255527634986834 -0.0005987978410320853 0.008414327545097198 0.07441080358599017 -0.07838745339935727
A

***** Episode 106493, Mean R = -9.2  Std R = 5.0  Min R = -23.1
PolicyLoss: 1.77
Policy_Entropy: 0.243
Policy_KL: 0.00824
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 3.96e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.00064


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0009   0.0034   5.5684   2.2087   2.1342
ADVA:  (22287,) (35614,) 0.62579322738249
ADV1:  0.0007845295515687229 -3.244490620898105e-05 0.007733014725014413 0.04362720498451339 -0.06148929937362185
ADVB:  (21135,) (35614,) 0.5934463974841354
ADV2:  0.14342992766242868 0.32593267203176757 0.42372032179809876 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5034   0.1753   0.8719 108.2148  71.3204  33.5059
***** Episode 106524, Mean R = -10.7  Std R = 6.0  Min R = -29.1
PolicyLoss: 1.57
Policy_Entropy: 0.243
Policy_KL: 0.00715
Policy_SD: 0.525
Steps: 1.2e+04
TotalSteps: 3.96e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.000446


ValFun  Gra

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6846   0.2662   1.4431 108.2148  71.3204  33.5059
***** Episode 106741, Mean R = -7.7  Std R = 4.7  Min R = -25.2
PolicyLoss: 1.69
Policy_Entropy: 0.244
Policy_KL: 0.00671
Policy_SD: 0.517
Steps: 1.19e+04
TotalSteps: 3.97e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.000335


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0048   5.5684   2.2087   2.1342
ADVA:  (21862,) (35432,) 0.617012869722285
ADV1:  0.00036186550467075425 -0.0008079400165166567 0.008143781819752916 0.04152957910443958 -0.06796175014152112
ADVB:  (19731,) (35432,) 0.5568694965003387
ADV2:  0.08136238897504601 0.25395597449595786 0.35376411535646435 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5667   0.2347   0.9678 108.2148  71.3204  33.5059
***** Episode 106772, Mean R = -10.1  Std R = 5.0  Min R = -23.2
PolicyLoss: 1.3
Policy_Entropy: 0.244
Policy_KL: 0.00734
Policy_SD: 0.518
Steps: 1.17e+04
Tota

attitude |    0.02    0.05    0.19 |    1.18    0.66    1.87 |   -3.14   -1.56   -3.14 |    3.14    1.54    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.05    0.18 |    0.67    1.88 |   -1.47   -3.12 |    1.39    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.02 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.67
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.97   -0.99 |    0.96    0.86
cs_angles |  0.0009  0.0033 |  0.0750  0.0762 | -0.9687 -0.9930 |  0.9631  0.8639
optical_flow | -0.0000 -0.0001 |  0.0168  0.0164 | -0.9715 -1.0173 |  0.7853  0.8031
v_err    | -0.0109 |  0.0594 | -0.4530 |  0.0992
landing_rewards |    9.55 |    2.08 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (20289,) (35479,) 0.5718594097917078
ADV1:  5.687431143724074e-05 -0.00033047853480268336 0.006970195123604729 0.04020433250987454 -0.08618879765264692
ADVB:  (19848,) (35479,) 0.5594295216888864
ADV2:  0.11208481792812222 0.3375452380573702 0.4662118182135552 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8054   0.3222   1.4301 108.2148  71.3204  33.5059
Update Cnt = 3460    ET =   1788.5   Stats:  Mean, Std, Min, Max
r_f      |    8.20    3.80   -6.15 |  175.56  169.97  204.40 | -396.35 -369.61 -394.91 |  368.24  394.05  383.18
v_f      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.13   -0.09   -0.10 |    0.10    0.11    0.10
r_i      |   30.21   14.02    0.46 |  661.19  648.12  783.51 |-1242.46-1324.69-1271.48 | 1332.74 1265.11 1297.51
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.10    0.09    0.10
norm_rf  |    0.13 |    0.05 |    0.02 |    0.30
norm_vf  |    0.08 |    0.01 |    0.05 |    0.13
gs_f  

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0016   0.0059   5.5684   2.2087   2.1342
ADVA:  (20230,) (35011,) 0.5778183999314501
ADV1:  0.002101687432582086 0.0010245825707222884 0.008035756526453627 0.05197628480291472 -0.06031244260026023
ADVB:  (24927,) (35011,) 0.7119762360401016
ADV2:  0.37055461610372037 0.46611839865433025 0.47783114407076627 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6389   0.2252   1.0610 108.2148  71.3204  33.5059
***** Episode 107578, Mean R = -8.7  Std R = 4.3  Min R = -19.5
PolicyLoss: 1.88
Policy_Entropy: 0.245
Policy_KL: 0.00685
Policy_SD: 0.522
Steps: 1.17e+04
TotalSteps: 4e+07
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.000217


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0009   0.0032   5.5684   2.2087   2.1342
ADVA:  (19572,) (34822,) 0.562058468784102
ADV1:  0.001125895053422022 0.0003860958145838374 0.007546672398341946 0.05197628480291472 -0.06031244260026023
ADVB:  (22

***** Episode 107795, Mean R = -8.7  Std R = 5.5  Min R = -24.9
PolicyLoss: 2.01
Policy_Entropy: 0.245
Policy_KL: 0.00775
Policy_SD: 0.525
Steps: 1.17e+04
TotalSteps: 4.01e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.000275


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0016   0.0060   5.5684   2.2087   2.1342
ADVA:  (20848,) (34987,) 0.5958784691456827
ADV1:  0.0006646875029574818 -0.00016288011183147197 0.008024042339591126 0.03473593044974593 -0.12150975870413139
ADVB:  (20423,) (34987,) 0.5837311001229027
ADV2:  0.13469222959375515 0.32447284853100167 0.4311353995885291 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7775   0.2774   1.3781 108.2148  71.3204  33.5059
***** Episode 107826, Mean R = -10.7  Std R = 6.3  Min R = -26.2
PolicyLoss: 1.58
Policy_Entropy: 0.245
Policy_KL: 0.0077
Policy_SD: 0.529
Steps: 1.16e+04
TotalSteps: 4.01e+07
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000373


ValFun 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5242   0.2168   0.9903 108.2148  71.3204  33.5059
***** Episode 108043, Mean R = -8.2  Std R = 4.4  Min R = -22.8
PolicyLoss: 1.58
Policy_Entropy: 0.245
Policy_KL: 0.00715
Policy_SD: 0.527
Steps: 1.18e+04
TotalSteps: 4.02e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.000199


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0024   5.5684   2.2087   2.1342
ADVA:  (19945,) (35258,) 0.5656872199217199
ADV1:  0.0004242467560320734 0.00021840499231072467 0.006038946131085983 0.08708223529156794 -0.09713936032243631
ADVB:  (19797,) (35258,) 0.5614895910148051
ADV2:  0.11336007831330308 0.34879804397721026 0.4885141187411357 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0134   0.3538   1.9882 108.2148  71.3204  33.5059
***** Episode 108074, Mean R = -9.8  Std R = 5.0  Min R = -22.2
PolicyLoss: 1.76
Policy_Entropy: 0.245
Policy_KL: 0.00878
Policy_SD: 0.534
Steps: 1.17e+04
TotalS

ADVA:  (18890,) (35067,) 0.5386830923660422
ADV1:  0.0010912698011822097 0.0005279659843961592 0.006730992420452973 0.043969222463689395 -0.09483893064378002
ADVB:  (22578,) (35067,) 0.6438531953118316
ADV2:  0.2686102524079296 0.42459912451836995 0.4921581806283922 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.2469   2.0638   8.6222 108.2148  71.3204  33.5059
***** Episode 108291, Mean R = -11.6  Std R = 4.7  Min R = -21.8
PolicyLoss: 1.88
Policy_Entropy: 0.245
Policy_KL: 0.0104
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 4.03e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00046


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0020   5.5684   2.2087   2.1342
ADVA:  (21144,) (35044,) 0.6033557813035042
ADV1:  0.00011270628058291689 -0.0003110616932170755 0.006852459709470451 0.05584852921944883 -0.0571560471516675
ADVB:  (18142,) (35044,) 0.5176920442871819
ADV2:  0.027740157065490316 0.2793765716753961 0.42493707862123

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.01   -0.07 |    0.65    1.93 |   -1.54   -3.13 |    1.51    3.11
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.03    0.03    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.53
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -0.99   -0.86 |    0.99    0.99
cs_angles |  0.0018  0.0038 |  0.0743  0.0766 | -0.9913 -0.8592 |  0.9856  0.9890
optical_flow |  0.0001  0.0001 |  0.0175  0.0173 | -1.0982 -0.8251 |  1.1255  1.0928
v_err    | -0.0107 |  0.0595 | -0.4520 |  0.1035
landing_rewards |    9.74 |    1.59 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -14.82 |    4.34 |  -42.61 |   -8.13
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5126   0.2036   0.9693 108.2148  71.3204  33.5059
Update Cnt = 3510    ET =   1520.6   Stats:  Mean, Std, Min, Max
r_f      |   -5.19  -20.09  -13.85 |  179.84  168.63  197.61 | -374.10 -369.95 -388.66 |  387.15  389.66  396.60
v_f      |    0.00    0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.09    0.10    0.09
r_i      |  -18.49  -65.37  -62.19 |  689.58  646.80  747.46 |-1293.34-1296.22-1293.25 | 1278.72 1313.35 1282.21
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.10    0.10
norm_rf  |    0.13 |    0.05 |    0.01 |    0.35
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f     |    1.23 |    1.91 |    0.01 |   20.16
thrust   |   -0.00    0.00   -0.00 |    0.65    0.65    0.65 |   -3.34   -3.46   -3.34 |    3.32    3.17    3.46
norm_thrust |    0.88 |    0.71 |    0.00 |    3.46
fuel     |    1.41 |    0.16 |    0.97 |    2.16
rewards  |   -8.93 

ADVA:  (21092,) (34991,) 0.6027835729187505
ADV1:  0.0010094155394964263 0.00018186779922909783 0.008714472977161958 0.03481588981861322 -0.06151509261114703
ADVB:  (21300,) (34991,) 0.6087279586179303
ADV2:  0.20063926043659114 0.3819654092265219 0.4603186572692555 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8361   0.2809   1.6015 108.2148  71.3204  33.5059
***** Episode 109128, Mean R = -10.3  Std R = 4.8  Min R = -25.3
PolicyLoss: 1.79
Policy_Entropy: 0.245
Policy_KL: 0.0065
Policy_SD: 0.532
Steps: 1.17e+04
TotalSteps: 4.06e+07
VF_0_ExplainedVarNew: 0.969
VF_0_ExplainedVarOld: 0.966
VF_0_Loss : 0.00145


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0011   0.0043   5.5684   2.2087   2.1342
ADVA:  (21663,) (34865,) 0.621339452172666
ADV1:  0.0009510293483086501 -3.758786077279026e-05 0.008972019191704554 0.04946024059059684 -0.07314469115514238
ADVB:  (20971,) (34865,) 0.6014914670873369
ADV2:  0.16991331347348199 0.34246782854941055 0.430554090163001

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0018   5.5684   2.2087   2.1342
ADVA:  (20435,) (35106,) 0.5820942289067396
ADV1:  0.0008942886944130358 5.484453894426444e-05 0.008027651420172711 0.04617436546775222 -0.06673930949125312
ADVB:  (22525,) (35106,) 0.6416282117017034
ADV2:  0.21702613959664319 0.3735344990123793 0.45936746494155345 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7021   0.2215   1.1502 108.2148  71.3204  33.5059
***** Episode 109376, Mean R = -10.4  Std R = 6.2  Min R = -25.1
PolicyLoss: 1.66
Policy_Entropy: 0.244
Policy_KL: 0.00698
Policy_SD: 0.54
Steps: 1.16e+04
TotalSteps: 4.07e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.000469


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0005   0.0019   5.5684   2.2087   2.1342
ADVA:  (21278,) (35139,) 0.6055380062039329
ADV1:  0.000334219774721008 -0.0006899085116630424 0.008754792282484996 0.04617436546775222 -0.11078891738142249
ADVB

***** Episode 109593, Mean R = -10.0  Std R = 5.0  Min R = -21.3
PolicyLoss: 1.4
Policy_Entropy: 0.245
Policy_KL: 0.00747
Policy_SD: 0.525
Steps: 1.17e+04
TotalSteps: 4.07e+07
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.962
VF_0_Loss : 0.000571


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0009   5.5684   2.2087   2.1342
ADVA:  (21827,) (35241,) 0.619363809199512
ADV1:  0.0008438453643129385 -0.0003635999663754307 0.008001187812036828 0.05753828452360521 -0.05463052627754039
ADVB:  (22096,) (35241,) 0.6269969637637979
ADV2:  0.19140173687590464 0.3220476656904763 0.3909119241270242 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5391   0.2378   1.1428 108.2148  71.3204  33.5059
***** Episode 109624, Mean R = -8.5  Std R = 5.1  Min R = -26.6
PolicyLoss: 1.46
Policy_Entropy: 0.246
Policy_KL: 0.00688
Policy_SD: 0.525
Steps: 1.18e+04
TotalSteps: 4.08e+07
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000376


ValFun  Gr

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3956   0.1354   0.7243 108.2148  71.3204  33.5059
***** Episode 109841, Mean R = -11.0  Std R = 5.7  Min R = -26.5
PolicyLoss: 1.58
Policy_Entropy: 0.245
Policy_KL: 0.00516
Policy_SD: 0.533
Steps: 1.17e+04
TotalSteps: 4.08e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.000377


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0012   5.5684   2.2087   2.1342
ADVA:  (19472,) (35086,) 0.554979193980505
ADV1:  0.0013539650829186584 0.0006810885291415055 0.006660497702017354 0.040732460790805225 -0.07200448416224331
ADVB:  (24153,) (35086,) 0.6883942313173346
ADV2:  0.33761786609841815 0.4542302314357846 0.49136480504264685 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7569   0.3029   1.5526 108.2148  71.3204  33.5059
***** Episode 109872, Mean R = -8.2  Std R = 3.8  Min R = -20.9
PolicyLoss: 1.88
Policy_Entropy: 0.245
Policy_KL: 0.00569
Policy_SD: 0.53
Steps: 1.17e+04
TotalS

w        |    0.00    0.00   -0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.01    0.05 |    0.66    1.87 |   -1.46   -3.14 |    1.39    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.02 |    0.03    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.50
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -0.90 |    0.98    0.96
cs_angles |  0.0016  0.0021 |  0.0764  0.0764 | -0.9873 -0.8995 |  0.9842  0.9626
optical_flow | -0.0001  0.0000 |  0.0157  0.0157 | -1.0150 -0.8325 |  1.0361  0.8715
v_err    | -0.0111 |  0.0604 | -0.4535 |  0.1098
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.03
tracking_rewards |  -15.30 |    4.69 |  -38.07 |   -8.51
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2822   0.0915   0.4404 108.2148  71.3204  33.5059
Update Cnt = 3560    ET =   1503.1   Stats:  Mean, Std, Min, Max
r_f      |    9.18   14.88    5.30 |  188.01  170.69  205.63 | -391.72 -385.61 -374.58 |  397.73  391.19  387.99
v_f      |   -0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.11 |    0.11    0.10    0.09
r_i      |   17.34   49.91   23.87 |  690.08  639.26  776.60 |-1310.66-1263.37-1291.87 | 1324.51 1266.52 1276.78
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.08    0.09
norm_rf  |    0.13 |    0.05 |    0.03 |    0.36
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f     |    1.34 |    2.18 |    0.01 |   23.25
thrust   |   -0.00   -0.00   -0.00 |    0.66    0.67    0.66 |   -3.46   -3.42   -3.46 |    3.43    3.29    3.41
norm_thrust |    0.90 |    0.71 |    0.00 |    3.46
fuel     |    1.45 |    0.19 |    1.06 |    2.12
rewards  |   -9.68 

ADVA:  (22474,) (35168,) 0.6390468607825296
ADV1:  0.0009434878203366734 -0.0007725551968957157 0.00904368899150641 0.0652484970399852 -0.0620702260936532
ADVB:  (21566,) (35168,) 0.6132279344858963
ADV2:  0.15685946191793135 0.2889625522974262 0.36843928234369105 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4162   0.1556   0.7609 108.2148  71.3204  33.5059
***** Episode 110678, Mean R = -11.0  Std R = 6.5  Min R = -27.4
PolicyLoss: 1.35
Policy_Entropy: 0.244
Policy_KL: 0.00592
Policy_SD: 0.534
Steps: 1.17e+04
TotalSteps: 4.12e+07
VF_0_ExplainedVarNew: 0.949
VF_0_ExplainedVarOld: 0.945
VF_0_Loss : 0.00218


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0013   0.0048   5.5684   2.2087   2.1342
ADVA:  (21761,) (35229,) 0.6177013256124216
ADV1:  0.0008637965656773635 -0.0004757647737465417 0.008963779752378538 0.03477574732395389 -0.07902772569172786
ADVB:  (21163,) (35229,) 0.6007266740469499
ADV2:  0.1593839086386245 0.3185677642467449 0.40110425229043034

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0011   0.0040   5.5684   2.2087   2.1342
ADVA:  (21808,) (34759,) 0.6274058517218563
ADV1:  0.002829912244714681 0.0010753913411450386 0.009719399516290859 0.06685105068494834 -0.06489458029755701
ADVB:  (24145,) (34759,) 0.694640237060905
ADV2:  0.36113603905053476 0.44338980845100456 0.4632785426370004 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3298   0.6323   2.8137 108.2148  71.3204  33.5059
***** Episode 110926, Mean R = -11.0  Std R = 6.5  Min R = -26.8
PolicyLoss: 1.83
Policy_Entropy: 0.245
Policy_KL: 0.0052
Policy_SD: 0.534
Steps: 1.15e+04
TotalSteps: 4.13e+07
VF_0_ExplainedVarNew: 0.97
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.000408


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0010   0.0035   5.5684   2.2087   2.1342
ADVA:  (21208,) (34824,) 0.6090052837123823
ADV1:  0.002868743630544778 0.001619496820382776 0.008491790679941704 0.06685105068494834 -0.07053760054921249
ADVB:  (2

***** Episode 111143, Mean R = -10.1  Std R = 5.5  Min R = -23.5
PolicyLoss: 1.55
Policy_Entropy: 0.245
Policy_KL: 0.00601
Policy_SD: 0.523
Steps: 1.18e+04
TotalSteps: 4.13e+07
VF_0_ExplainedVarNew: 0.946
VF_0_ExplainedVarOld: 0.943
VF_0_Loss : 0.000691


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0013   0.0050   5.5684   2.2087   2.1342
ADVA:  (21912,) (35182,) 0.6228184867261668
ADV1:  0.0006263516225965427 -0.0006144655668965418 0.009218545176849786 0.03758146590303241 -0.06292191126768401
ADVB:  (20962,) (35182,) 0.595816042294355
ADV2:  0.13406098279960368 0.30715745575638403 0.40424216768357896 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5170   0.7537   3.3992 108.2148  71.3204  33.5059
***** Episode 111174, Mean R = -10.0  Std R = 5.6  Min R = -26.5
PolicyLoss: 1.48
Policy_Entropy: 0.245
Policy_KL: 0.0088
Policy_SD: 0.525
Steps: 1.15e+04
TotalSteps: 4.13e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.967
VF_0_Loss : 0.000444


ValFun 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5633   0.1980   1.0346 108.2148  71.3204  33.5059
***** Episode 111391, Mean R = -9.4  Std R = 5.5  Min R = -23.1
PolicyLoss: 1.53
Policy_Entropy: 0.245
Policy_KL: 0.00621
Policy_SD: 0.524
Steps: 1.16e+04
TotalSteps: 4.14e+07
VF_0_ExplainedVarNew: 0.967
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.000231


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0032   5.5684   2.2087   2.1342
ADVA:  (21234,) (34815,) 0.609909521757863
ADV1:  0.0021721350503768767 0.0010442808111517789 0.007884328682367683 0.057122683056824386 -0.06710816877083566
ADVB:  (23696,) (34815,) 0.6806261668820911
ADV2:  0.341898839029071 0.4442713132475351 0.476339162806203 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3409   0.5900   2.7357 108.2148  71.3204  33.5059
***** Episode 111422, Mean R = -9.1  Std R = 6.1  Min R = -29.3
PolicyLoss: 1.86
Policy_Entropy: 0.245
Policy_KL: 0.00596
Policy_SD: 0.518
Steps: 1.16e+04
TotalSteps

w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.01   -0.07 |    0.65    1.87 |   -1.53   -3.12 |    1.44    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.02 |    0.03    0.02    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.24 |    0.00 |    1.55
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.91   -0.99 |    0.94    0.87
cs_angles |  0.0017  0.0027 |  0.0725  0.0727 | -0.9119 -0.9869 |  0.9447  0.8723
optical_flow |  0.0000 -0.0000 |  0.0172  0.0171 | -0.9548 -0.8740 |  1.0515  0.9383
v_err    | -0.0111 |  0.0602 | -0.4607 |  0.1009
landing_rewards |    9.23 |    2.67 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -14.50 |    4.19 |  -33.86 |   -7.45
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.5412   2.4688  10.4433 108.2148  71.3204  33.5059
Update Cnt = 3610    ET =   1408.0   Stats:  Mean, Std, Min, Max
r_f      |  -17.62  -20.88   -7.01 |  185.97  169.21  207.48 | -391.27 -378.93 -387.79 |  384.04  399.42  389.35
v_f      |    0.00    0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.11 |    0.10    0.10    0.09
r_i      |  -45.93  -45.46  -46.18 |  676.93  660.43  781.96 |-1369.18-1292.10-1351.00 | 1351.69 1297.49 1318.38
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.10    0.10
norm_rf  |    0.12 |    0.06 |    0.02 |    0.37
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f     |    1.42 |    2.21 |    0.01 |   17.39
thrust   |    0.00    0.00   -0.00 |    0.64    0.66    0.64 |   -3.41   -3.30   -3.42 |    3.36    3.45    3.39
norm_thrust |    0.87 |    0.70 |    0.00 |    3.46
fuel     |    1.41 |    0.17 |    1.02 |    2.01
rewards  |   -9.07 

ADVA:  (20524,) (35028,) 0.5859312549960032
ADV1:  0.0011671394116406367 -5.512519143544703e-05 0.009292377939972844 0.04073667209438564 -0.08513346440093278
ADVB:  (21809,) (35028,) 0.6226161927600776
ADV2:  0.23425330114507453 0.391817413976151 0.4643041353986436 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.1749   1.6615   7.4348 108.2148  71.3204  33.5059
***** Episode 112228, Mean R = -10.2  Std R = 6.3  Min R = -23.8
PolicyLoss: 1.81
Policy_Entropy: 0.245
Policy_KL: 0.0111
Policy_SD: 0.527
Steps: 1.16e+04
TotalSteps: 4.17e+07
VF_0_ExplainedVarNew: 0.964
VF_0_ExplainedVarOld: 0.959
VF_0_Loss : 0.000532


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0045   0.0027   0.0099   5.5684   2.2087   2.1342
ADVA:  (22901,) (34885,) 0.6564712627203669
ADV1:  0.0030817739339461386 0.00118541507473451 0.00949166094543327 0.03965908937725876 -0.08513346440093278
ADVB:  (23781,) (34885,) 0.6816970044431704
ADV2:  0.3425180724864925 0.4320826829276668 0.4506456687963231 3.0

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0009   5.5684   2.2087   2.1342
ADVA:  (18186,) (35154,) 0.5173237753882916
ADV1:  0.00010804793244602665 -0.00022536139122081617 0.006025548081862593 0.0416916312848733 -0.0576760400656105
ADVB:  (20534,) (35154,) 0.584115605620982
ADV2:  0.1527388961418208 0.3617158408881209 0.4807644421140013 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7625   0.3468   1.4455 108.2148  71.3204  33.5059
***** Episode 112476, Mean R = -9.2  Std R = 5.2  Min R = -26.7
PolicyLoss: 1.76
Policy_Entropy: 0.245
Policy_KL: 0.00904
Policy_SD: 0.526
Steps: 1.17e+04
TotalSteps: 4.18e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.000151


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0016   5.5684   2.2087   2.1342
ADVA:  (18672,) (34964,) 0.5340350074362201
ADV1:  0.0003746774774875585 0.000283031457810885 0.005610790414179168 0.0416916312848733 -0.0562988902053462
ADVB:  (2

***** Episode 112693, Mean R = -9.6  Std R = 4.9  Min R = -22.9
PolicyLoss: 1.54
Policy_Entropy: 0.246
Policy_KL: 0.013
Policy_SD: 0.52
Steps: 1.16e+04
TotalSteps: 4.19e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.000328


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0007   5.5684   2.2087   2.1342
ADVA:  (21243,) (35083,) 0.605506940683522
ADV1:  0.00020740153072732088 -0.0006883421996615709 0.008180421699059208 0.06975975281357777 -0.08970768531163528
ADVB:  (19852,) (35083,) 0.5658581079155146
ADV2:  0.1224129324766032 0.3148599276507576 0.4202405473573631 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6125   0.2239   1.0715 108.2148  71.3204  33.5059
***** Episode 112724, Mean R = -9.6  Std R = 5.3  Min R = -23.6
PolicyLoss: 1.58
Policy_Entropy: 0.246
Policy_KL: 0.0103
Policy_SD: 0.518
Steps: 1.18e+04
TotalSteps: 4.19e+07
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.97
VF_0_Loss : 0.000389


ValFun  Gradien

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.4842   0.9987   4.6745 108.2148  71.3204  33.5059
***** Episode 112941, Mean R = -10.7  Std R = 5.9  Min R = -27.0
PolicyLoss: 2.1
Policy_Entropy: 0.245
Policy_KL: 0.0106
Policy_SD: 0.527
Steps: 1.15e+04
TotalSteps: 4.2e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.000152


Dynamics: Max Disturbance (m/s^2):  [0.00140921 0.00141043 0.00142314] 0.0024495944750556626
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0005   0.0020   5.5684   2.2087   2.1342
ADVA:  (19662,) (34861,) 0.564011359398755
ADV1:  0.0006213924254607848 8.22492224340643e-05 0.007088407504732139 0.04777224663611351 -0.08217799286299515
ADVB:  (21154,) (34861,) 0.6068099021829552
ADV2:  0.18390775403093157 0.3592197824559643 0.45690604530097473 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2990   0.4689   2.2701 108.2148  71.3204  33.5059
***** Episode 112972, Mean R = -8.8  Std R = 4.4  Min R = -21.1
PolicyLo

w        |    0.00    0.00   -0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.02   -0.01 |    0.68    1.73 |   -1.40   -3.13 |    1.45    3.12
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.01 |    0.02    0.03    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.25 |    0.00 |    1.31
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.97   -1.00 |    0.98    0.90
cs_angles |  0.0009  0.0041 |  0.0774  0.0750 | -0.9695 -0.9979 |  0.9773  0.8987
optical_flow | -0.0000 -0.0000 |  0.0186  0.0173 | -1.0040 -1.0536 |  0.9173  1.0992
v_err    | -0.0108 |  0.0594 | -0.4535 |  0.1002
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -14.88 |    4.30 |  -37.49 |   -7.76
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2129   0.4958   2.3894 108.2148  71.3204  33.5059
Update Cnt = 3660    ET =   1375.7   Stats:  Mean, Std, Min, Max
r_f      |   -2.99  -16.18   20.01 |  192.85  170.43  202.21 | -385.61 -393.50 -386.08 |  388.18  371.13  397.03
v_f      |    0.00    0.00   -0.01 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.10    0.09
r_i      |   -7.63  -52.05  106.58 |  689.59  638.89  772.38 |-1346.47-1259.59-1292.83 | 1351.79 1260.27 1317.26
v_i      |    0.00    0.00   -0.01 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.12 |    0.05 |    0.01 |    0.33
norm_vf  |    0.08 |    0.01 |    0.04 |    0.11
gs_f     |    1.53 |    2.95 |    0.00 |   38.43
thrust   |   -0.00    0.00    0.00 |    0.65    0.66    0.65 |   -3.34   -3.28   -3.41 |    3.27    3.46    3.39
norm_thrust |    0.88 |    0.70 |    0.00 |    3.46
fuel     |    1.41 |    0.18 |    1.01 |    2.16
rewards  |   -9.31 

ADVA:  (20110,) (35190,) 0.5714691673770957
ADV1:  0.0009681204587087336 0.00029496648976022773 0.006925535307979709 0.04335723923798057 -0.05034498106712715
ADVB:  (22873,) (35190,) 0.6499857914180165
ADV2:  0.256327859163779 0.4094404425822804 0.49369014010301066 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6846   0.2340   1.1507 108.2148  71.3204  33.5059
***** Episode 113778, Mean R = -10.2  Std R = 5.7  Min R = -22.2
PolicyLoss: 1.79
Policy_Entropy: 0.246
Policy_KL: 0.00761
Policy_SD: 0.524
Steps: 1.16e+04
TotalSteps: 4.23e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.00011


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0006   5.5684   2.2087   2.1342
ADVA:  (20720,) (35053,) 0.5911048982968647
ADV1:  0.0011551926698398184 0.00041878157445173966 0.007184013411108094 0.04335723923798057 -0.09359360759358476
ADVB:  (22990,) (35053,) 0.6558639774056428
ADV2:  0.24820949799077202 0.3939998789500939 0.474708274399695

Dynamics: Max Disturbance (m/s^2):  [0.00140921 0.00141043 0.00142314] 0.0024495944750556626
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0016   5.5684   2.2087   2.1342
ADVA:  (17874,) (34690,) 0.5152493513980975
ADV1:  0.00047751173108743095 0.00031780819530417495 0.005261645636505239 0.04089701862092926 -0.051279897110604857
ADVB:  (20868,) (34690,) 0.6015566445661574
ADV2:  0.23819302043977392 0.4548940158000654 0.5641272363178813 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9888   0.4121   1.8230 108.2148  71.3204  33.5059
***** Episode 114026, Mean R = -8.0  Std R = 4.5  Min R = -23.3
PolicyLoss: 2.14
Policy_Entropy: 0.247
Policy_KL: 0.00732
Policy_SD: 0.521
Steps: 1.17e+04
TotalSteps: 4.24e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.000109


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0006   0.0021   5.5684   2.2087   2.1342
ADVA:  (17553,) (34955,) 0.5021599198970105
ADV1:  -9.075869360183

***** Episode 114243, Mean R = -9.2  Std R = 5.1  Min R = -19.5
PolicyLoss: 1.63
Policy_Entropy: 0.247
Policy_KL: 0.00759
Policy_SD: 0.513
Steps: 1.16e+04
TotalSteps: 4.25e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.000226


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0029   5.5684   2.2087   2.1342
ADVA:  (21598,) (35016,) 0.616803746858579
ADV1:  0.0009328960016708396 0.0001383552432022068 0.00759547034079989 0.034996778768133285 -0.09741165316235462
ADVB:  (21188,) (35016,) 0.605094813799406
ADV2:  0.17324414516533454 0.34708241899465125 0.44066699560089345 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9731   0.3572   1.8679 108.2148  71.3204  33.5059
***** Episode 114274, Mean R = -8.5  Std R = 5.6  Min R = -28.3
PolicyLoss: 1.62
Policy_Entropy: 0.247
Policy_KL: 0.00831
Policy_SD: 0.517
Steps: 1.17e+04
TotalSteps: 4.25e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.000338


Dynamics: 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7212   0.7225   3.3525 108.2148  71.3204  33.5059
***** Episode 114491, Mean R = -9.9  Std R = 6.0  Min R = -25.6
PolicyLoss: 1.39
Policy_Entropy: 0.246
Policy_KL: 0.0117
Policy_SD: 0.522
Steps: 1.15e+04
TotalSteps: 4.26e+07
VF_0_ExplainedVarNew: 0.965
VF_0_ExplainedVarOld: 0.963
VF_0_Loss : 0.000321


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0034   5.5684   2.2087   2.1342
ADVA:  (21189,) (35023,) 0.6050024269765583
ADV1:  0.0007134512064865452 -3.931270561630863e-05 0.007474213311072177 0.036748517066666264 -0.08042822559876983
ADVB:  (19987,) (35023,) 0.5706821231761985
ADV2:  0.11975571157535553 0.3101549971611661 0.41569216243318974 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3404   0.1002   0.6696 108.2148  71.3204  33.5059
***** Episode 114522, Mean R = -9.4  Std R = 4.2  Min R = -18.7
PolicyLoss: 1.54
Policy_Entropy: 0.246
Policy_KL: 0.00774
Policy_SD: 0.522
Steps: 1.18e+04
Total

attitude |   -0.13    0.01   -0.01 |    1.19    0.64    1.86 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |   -0.00    0.00   -0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.01   -0.09 |    0.63    1.87 |   -1.53   -3.14 |    1.48    3.12
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.02 |    0.02    0.03    0.03
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.20 |    0.25 |    0.00 |    1.36
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.90   -0.96 |    1.00    0.99
cs_angles |  0.0002  0.0032 |  0.0739  0.0740 | -0.9021 -0.9616 |  0.9963  0.9897
optical_flow | -0.0001 -0.0001 |  0.0166  0.0164 | -0.9072 -0.9653 |  1.1524  1.0667
v_err    | -0.0108 |  0.0591 | -0.4578 |  0.1119
landing_rewards |    9.71 |    1.68 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (20321,) (35255,) 0.5764005105658772
ADV1:  0.0007947340913579282 0.00041841611810747856 0.007053525847561731 0.09889975742466145 -0.08617354409282912
ADVB:  (21269,) (35255,) 0.603290313430719
ADV2:  0.17152864318857697 0.3722012194137517 0.4889639005784236 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1570   0.5482   2.1536 108.2148  71.3204  33.5059
Update Cnt = 3710    ET =   1325.4   Stats:  Mean, Std, Min, Max
r_f      |   -3.50   15.92    0.75 |  187.32  171.19  190.73 | -396.83 -385.21 -389.95 |  361.53  364.82  391.34
v_f      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.11    0.10
r_i      |   -4.82   40.52   39.99 |  695.63  676.78  722.89 |-1323.08-1338.72-1228.68 | 1347.92 1363.40 1316.95
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.12 |    0.05 |    0.01 |    0.32
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f    

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0005   5.5684   2.2087   2.1342
ADVA:  (19945,) (35328,) 0.5645663496376812
ADV1:  0.0010284512489928052 0.0003730772211046421 0.006572976602293181 0.03532996462452065 -0.06673226289006075
ADVB:  (22858,) (35328,) 0.6470221920289855
ADV2:  0.25030114438382134 0.3952699338268634 0.47152468037766926 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6550   0.2688   1.2088 108.2148  71.3204  33.5059
***** Episode 115328, Mean R = -9.6  Std R = 5.7  Min R = -22.3
PolicyLoss: 1.74
Policy_Entropy: 0.246
Policy_KL: 0.00674
Policy_SD: 0.519
Steps: 1.18e+04
TotalSteps: 4.29e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.000127


Dynamics: Max Disturbance (m/s^2):  [0.00140921 0.00141043 0.00142314] 0.0024495944750556626
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0019   5.5684   2.2087   2.1342
ADVA:  (20116,) (35226,) 0.5710554703911883
ADV1:  0.000601175879230

***** Episode 115545, Mean R = -10.0  Std R = 6.5  Min R = -29.8
PolicyLoss: 1.54
Policy_Entropy: 0.246
Policy_KL: 0.00898
Policy_SD: 0.525
Steps: 1.15e+04
TotalSteps: 4.3e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.000194


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0008   5.5684   2.2087   2.1342
ADVA:  (20905,) (35088,) 0.5957877336981304
ADV1:  0.000430853208973285 -0.0003591904415737785 0.007214078606691759 0.04122411749493016 -0.11201606435399869
ADVB:  (20016,) (35088,) 0.5704514363885089
ADV2:  0.11879940967957114 0.311561611261214 0.4275003323335612 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7663   0.2673   1.3566 108.2148  71.3204  33.5059
***** Episode 115576, Mean R = -9.1  Std R = 5.2  Min R = -21.0
PolicyLoss: 1.55
Policy_Entropy: 0.246
Policy_KL: 0.00732
Policy_SD: 0.528
Steps: 1.18e+04
TotalSteps: 4.3e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.000189


Dynamics: Ma

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6071   0.2531   1.0704 108.2148  71.3204  33.5059
***** Episode 115793, Mean R = -8.3  Std R = 3.9  Min R = -20.9
PolicyLoss: 1.64
Policy_Entropy: 0.247
Policy_KL: 0.00825
Policy_SD: 0.518
Steps: 1.17e+04
TotalSteps: 4.31e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.000648


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0022   5.5684   2.2087   2.1342
ADVA:  (19722,) (35237,) 0.559695774328121
ADV1:  0.0009688530583126112 0.0003132651916816345 0.00749159471953943 0.036677975018777065 -0.09649161829797853
ADVB:  (22479,) (35237,) 0.6379373953514771
ADV2:  0.20726967811054473 0.36882852709427777 0.458072784143535 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6778   0.2445   1.2118 108.2148  71.3204  33.5059
***** Episode 115824, Mean R = -12.4  Std R = 5.7  Min R = -29.6
PolicyLoss: 1.64
Policy_Entropy: 0.247
Policy_KL: 0.00676
Policy_SD: 0.529
Steps: 1.18e+04
TotalSt

ADVA:  (20551,) (35094,) 0.5855986778366672
ADV1:  0.0004918326083414859 0.00013076446713306434 0.006838009971875581 0.041865898677711955 -0.08287480049611884
ADVB:  (19870,) (35094,) 0.5661936513364108
ADV2:  0.1325614693234664 0.36397682776582146 0.48401556917640115 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2335   0.0789   0.4855 108.2148  71.3204  33.5059
***** Episode 116041, Mean R = -7.8  Std R = 4.2  Min R = -20.8
PolicyLoss: 1.82
Policy_Entropy: 0.246
Policy_KL: 0.0073
Policy_SD: 0.524
Steps: 1.17e+04
TotalSteps: 4.32e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.000545


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0046   5.5684   2.2087   2.1342
ADVA:  (21174,) (35048,) 0.6041428897511983
ADV1:  0.0 -0.000582215615305934 0.007052572160092243 0.0485812279714094 -0.08287480049611884
ADVB:  (17263,) (35048,) 0.4925530700753253
ADV2:  0.0 0.2684047995575007 0.41973417224531473 3.0 0.0
Policy  Gradients: u/sd

attitude |    0.09   -0.08   -0.03 |    1.27    0.66    1.91 |   -3.14   -1.57   -3.14 |    3.14    1.44    3.14
w        |    0.00    0.00   -0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.08   -0.06 |    0.65    1.90 |   -1.45   -3.14 |    1.29    3.14
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.01 |    0.02    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.56
seeker_angles |    0.00    0.00 |    0.07    0.08 |   -1.00   -0.95 |    0.93    0.98
cs_angles |  0.0020  0.0004 |  0.0727  0.0755 | -0.9971 -0.9515 |  0.9271  0.9761
optical_flow | -0.0001  0.0000 |  0.0166  0.0163 | -1.0405 -0.9778 |  1.0488  0.9688
v_err    | -0.0108 |  0.0594 | -0.4536 |  0.0980
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (20486,) (35247,) 0.5812125854682668
ADV1:  0.00019965767288319426 -0.00016702523522724058 0.0068253100619716026 0.05803383774255805 -0.07758335304266668
ADVB:  (19539,) (35247,) 0.5543450506426079
ADV2:  0.09057252350380596 0.30694734995407524 0.43641681427780904 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3778   0.1465   0.7600 108.2148  71.3204  33.5059
Update Cnt = 3760    ET =   1192.3   Stats:  Mean, Std, Min, Max
r_f      |   -7.07    2.82    1.23 |  188.15  177.70  201.64 | -385.13 -389.23 -391.83 |  387.06  374.70  392.98
v_f      |    0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.10    0.10    0.09
r_i      |  -32.08   28.41    7.10 |  711.73  658.23  754.31 |-1312.93-1330.94-1315.00 | 1324.28 1326.06 1389.45
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.10    0.09    0.09
norm_rf  |    0.12 |    0.05 |    0.01 |    0.29
norm_vf  |    0.08 |    0.01 |    0.04 |    0.13
gs

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0020   5.5684   2.2087   2.1342
ADVA:  (22062,) (35327,) 0.6245081665581567
ADV1:  0.0010834377850635693 0.0004257218026513881 0.007067986528775931 0.038241140243231164 -0.09578686543608683
ADVB:  (21764,) (35327,) 0.6160726922750305
ADV2:  0.19483720571513413 0.3588741496063894 0.4459733086292225 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9890   0.4112   1.9154 108.2148  71.3204  33.5059
***** Episode 116878, Mean R = -8.3  Std R = 4.5  Min R = -22.9
PolicyLoss: 1.65
Policy_Entropy: 0.247
Policy_KL: 0.00493
Policy_SD: 0.52
Steps: 1.19e+04
TotalSteps: 4.35e+07
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.000338


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0014   0.0051   5.5684   2.2087   2.1342
ADVA:  (22987,) (35525,) 0.6470654468684025
ADV1:  0.00014182501101258314 -0.0005551330209960324 0.007215911819084815 0.04349854995967162 -0.0720567185114034
ADVB

***** Episode 117095, Mean R = -8.7  Std R = 4.0  Min R = -22.6
PolicyLoss: 1.67
Policy_Entropy: 0.247
Policy_KL: 0.00929
Policy_SD: 0.53
Steps: 1.16e+04
TotalSteps: 4.36e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.000529


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0009   0.0032   5.5684   2.2087   2.1342
ADVA:  (20067,) (35025,) 0.5729336188436831
ADV1:  0.00029860193240924086 -0.0002369937761028933 0.0067670036016780465 0.04652845374647091 -0.05444836485352321
ADVB:  (19805,) (35025,) 0.5654532476802284
ADV2:  0.11087159926140029 0.3138299851503237 0.4285524545446398 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5447   0.2240   1.0351 108.2148  71.3204  33.5059
***** Episode 117126, Mean R = -9.3  Std R = 5.1  Min R = -21.6
PolicyLoss: 1.57
Policy_Entropy: 0.246
Policy_KL: 0.00798
Policy_SD: 0.526
Steps: 1.18e+04
TotalSteps: 4.36e+07
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.00043


ValFun  G

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.7414   1.7187   8.3304 108.2148  71.3204  33.5059
***** Episode 117343, Mean R = -9.6  Std R = 5.5  Min R = -25.6
PolicyLoss: 1.43
Policy_Entropy: 0.246
Policy_KL: 0.0163
Policy_SD: 0.529
Steps: 1.17e+04
TotalSteps: 4.37e+07
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.959
VF_0_Loss : 0.0184


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0032   5.5684   2.2087   2.1342
ADVA:  (23180,) (35207,) 0.6583917970858068
ADV1:  0.0002554413930913671 -0.0010763663735764775 0.008875922934115148 0.043181903829507484 -0.0602749352280092
ADVB:  (18788,) (35207,) 0.5336438776379697
ADV2:  0.047139470607988586 0.24443258298476422 0.34490096117109414 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4183   0.6883   2.7552 108.2148  71.3204  33.5059
***** Episode 117374, Mean R = -10.8  Std R = 6.7  Min R = -23.6
PolicyLoss: 1.3
Policy_Entropy: 0.246
Policy_KL: 0.0145
Policy_SD: 0.53
Steps: 1.17e+04
TotalSte

ADVA:  (20825,) (35159,) 0.5923092238118263
ADV1:  0.0013357700304576156 0.0007457032496633477 0.007028186814568659 0.05704377275201661 -0.12181851720464909
ADVB:  (23482,) (35159,) 0.667880201370915
ADV2:  0.2769111365431336 0.4254799931129425 0.499028716790888 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4894   0.2210   1.0756 108.2148  71.3204  33.5059
***** Episode 117591, Mean R = -9.8  Std R = 5.6  Min R = -26.7
PolicyLoss: 1.81
Policy_Entropy: 0.247
Policy_KL: 0.00556
Policy_SD: 0.533
Steps: 1.19e+04
TotalSteps: 4.38e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.000707


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0027   5.5684   2.2087   2.1342
ADVA:  (22176,) (35228,) 0.629499261950721
ADV1:  0.00046514689578799114 -0.0002166801745520551 0.007292186219577774 0.03753494158013315 -0.12181851720464909
ADVB:  (19715,) (35228,) 0.5596400590439423
ADV2:  0.09261162228340114 0.298944266589385 0.41268422145062855 3

att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |    0.13   -0.04   -0.08 |    1.17    0.63    1.83 |   -3.14   -1.57   -3.14 |    3.14    1.54    3.14
w        |    0.00    0.00   -0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |   -0.05   -0.09 |    0.63    1.83 |   -1.53   -3.11 |    1.36    3.11
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.01 |    0.04    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.67
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.97   -0.98 |    0.96    0.97
cs_angles |  0.0012  0.0039 |  0.0756  0.0758 | -0.9702 -0.9765 |  0.9627  0.9748
optical_flow | -0.0001 -0.0000 |  0.0157  0.0161 | -1.1137 -1.0290 |  0.8725  0.9435
v_err    | -0.0110 |  

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0050   0.0029   0.0108   5.5684   2.2087   2.1342
ADVA:  (21562,) (35329,) 0.6103201336012907
ADV1:  0.0024228356261397693 0.0013409752317098424 0.008646256752412497 0.0579252978475539 -0.08254771569449865
ADVB:  (23784,) (35329,) 0.6732146395312633
ADV2:  0.2849984566436527 0.42138622984329277 0.4705338240866594 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   9.9096   4.9184  19.0305 108.2148  71.3204  33.5059
Update Cnt = 3810    ET =   1523.0   Stats:  Mean, Std, Min, Max
r_f      |    8.40    6.80    7.48 |  170.61  184.14  194.81 | -393.79 -377.80 -392.61 |  399.64  371.63  394.25
v_f      |   -0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.09    0.09
r_i      |   50.99   42.70   32.40 |  668.74  681.95  751.90 |-1320.88-1314.28-1306.37 | 1307.55 1342.79 1356.35
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.09    0.10
norm

***** Episode 118397, Mean R = -10.1  Std R = 6.4  Min R = -27.5
PolicyLoss: 1.53
Policy_Entropy: 0.248
Policy_KL: 0.00736
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps: 4.41e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.00132


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0035   5.5684   2.2087   2.1342
ADVA:  (20752,) (34845,) 0.5955517290859521
ADV1:  0.0011035161547201851 0.0004979017608405689 0.007255518128255346 0.04594095386426189 -0.06250120674253906
ADVB:  (21151,) (34845,) 0.6070024393743723
ADV2:  0.1956639209457762 0.39178018167890233 0.5017663400652751 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7530   0.3070   1.2522 108.2148  71.3204  33.5059
***** Episode 118428, Mean R = -10.9  Std R = 5.6  Min R = -25.0
PolicyLoss: 1.83
Policy_Entropy: 0.247
Policy_KL: 0.00752
Policy_SD: 0.537
Steps: 1.15e+04
TotalSteps: 4.41e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.00046


ValFun  Gr

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   8.5042   4.3855  15.8107 108.2148  71.3204  33.5059
***** Episode 118645, Mean R = -9.2  Std R = 4.6  Min R = -21.5
PolicyLoss: 2.36
Policy_Entropy: 0.247
Policy_KL: 0.00993
Policy_SD: 0.533
Steps: 1.19e+04
TotalSteps: 4.42e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.000129


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0030   5.5684   2.2087   2.1342
ADVA:  (20002,) (35231,) 0.567738639266555
ADV1:  0.0 -0.0004602159568203319 0.0060996354761315525 0.05257544984986584 -0.0753930519017239
ADVB:  (17822,) (35231,) 0.5058613153188953
ADV2:  0.01108703310010409 0.3213233542541921 0.4929399177830882 3.0 0.0
 *** BROKE ***   5 0.5422502160072327
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  21.5612  17.3295  52.8861 108.2148  71.3204  33.5059
***** Episode 118676, Mean R = -9.4  Std R = 5.1  Min R = -22.7
PolicyLoss: 1.89
Policy_Entropy: 0.242
Policy_KL: 0.542
Policy_SD: 0.533
Steps

ADVA:  (22080,) (34956,) 0.6316512186749056
ADV1:  0.0009756353232173685 -0.00014961178625890552 0.008028644381696556 0.05262084117002097 -0.06485041329346936
ADVB:  (20760,) (34956,) 0.5938894610367319
ADV2:  0.14755850470308463 0.30408616788952403 0.3961058137050287 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5395   0.2092   0.9483 108.2148  71.3204  33.5059
***** Episode 118893, Mean R = -10.7  Std R = 4.9  Min R = -24.2
PolicyLoss: 1.45
Policy_Entropy: 0.246
Policy_KL: 0.0049
Policy_SD: 0.54
Steps: 1.15e+04
TotalSteps: 4.43e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.000128


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0012   0.0043   5.5684   2.2087   2.1342
ADVA:  (20460,) (34905,) 0.5861624409110443
ADV1:  0.0011825379151130713 0.000331883843155463 0.007931823662240007 0.05262084117002097 -0.07981170259390746
ADVB:  (21782,) (34905,) 0.6240366709640452
ADV2:  0.20397356377648398 0.3632471807555652 0.448558566312494

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0027   5.5684   2.2087   2.1342
ADVA:  (17034,) (34858,) 0.4886683114349647
ADV1:  0.0007072217456698384 0.0007141525222447155 0.006930202677431158 0.048021537025529415 -0.06445588744346847
ADVB:  (21932,) (34858,) 0.6291812496414022
ADV2:  0.29075844029610276 0.5112567516170317 0.6044778293155633 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6976   0.2788   1.3098 108.2148  71.3204  33.5059
***** Episode 119141, Mean R = -9.1  Std R = 6.4  Min R = -35.3
PolicyLoss: 2.32
Policy_Entropy: 0.247
Policy_KL: 0.00791
Policy_SD: 0.533
Steps: 1.15e+04
TotalSteps: 4.44e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.000609


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0014   5.5684   2.2087   2.1342
ADVA:  (16975,) (35038,) 0.4844739996575147
ADV1:  0.000749663673395142 0.0006296319953778552 0.006102834902133141 0.048021537025529415 -0.05386246089526975
ADVB

attitude |   -0.08    0.02   -0.03 |    1.18    0.63    1.85 |   -3.14   -1.56   -3.14 |    3.14    1.47    3.14
w        |    0.00    0.00    0.00 |    0.00    0.00    0.00 |   -0.05   -0.05   -0.05 |    0.05    0.05    0.05
a_f      |    0.01   -0.07 |    0.63    1.85 |   -1.31   -3.13 |    1.40    3.13
w_f      |    0.00    0.00    0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.01 |    0.03    0.02    0.02
w_rewards |    0.00 |    0.00 |    0.00 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.21 |    0.26 |    0.00 |    1.43
seeker_angles |    0.00    0.00 |    0.07    0.07 |   -0.89   -0.98 |    0.89    0.99
cs_angles |  0.0007  0.0007 |  0.0736  0.0736 | -0.8899 -0.9811 |  0.8913  0.9914
optical_flow |  0.0001 -0.0000 |  0.0151  0.0158 | -0.9438 -1.0480 |  0.8532  1.0342
v_err    | -0.0111 |  0.0593 | -0.4529 |  0.1010
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (21620,) (34801,) 0.6212465159047154
ADV1:  0.0012251708345405983 0.00017802753817759806 0.008152244962324888 0.052544249548248545 -0.07030282398846105
ADVB:  (21634,) (34801,) 0.6216488031953105
ADV2:  0.19840640205557525 0.35164455749511975 0.44586250420716506 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6509   0.2406   1.2042 108.2148  71.3204  33.5059
Update Cnt = 3860    ET =   1501.9   Stats:  Mean, Std, Min, Max
r_f      |   -3.23  -24.77   -6.02 |  182.49  181.31  198.52 | -397.88 -390.72 -392.29 |  384.65  378.01  399.91
v_f      |    0.00    0.00    0.00 |    0.04    0.05    0.05 |   -0.10   -0.09   -0.11 |    0.11    0.10    0.11
r_i      |    1.04 -109.41  -13.86 |  647.22  673.46  773.68 |-1279.51-1345.23-1282.43 | 1282.06 1233.53 1335.21
v_i      |    0.00    0.01   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.12 |    0.05 |    0.03 |    0.30
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0016   5.5684   2.2087   2.1342
ADVA:  (19180,) (35277,) 0.543697026391133
ADV1:  0.0001056011614624739 -4.1460660463304936e-05 0.00595380301096386 0.050522992093208596 -0.08037289331278984
ADVB:  (19318,) (35277,) 0.5476089236613091
ADV2:  0.0938578095075075 0.3552630443130688 0.4936002971725908 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9907   0.4465   2.2493 108.2148  71.3204  33.5059
***** Episode 119978, Mean R = -10.6  Std R = 6.5  Min R = -29.8
PolicyLoss: 1.83
Policy_Entropy: 0.248
Policy_KL: 0.00798
Policy_SD: 0.533
Steps: 1.16e+04
TotalSteps: 4.47e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.000448




In [6]:
fname = "optimize_WATTVW_FOV-AR=5-AWR"
policy.save_params(fname)
