# Test Recurrent Policy with Extreme Parameter Variation 

In [1]:
import numpy as np
import os,sys



sys.path.append('../../../RL_lib/Agents')
sys.path.append('../../../RL_lib/Policies/AWR')
sys.path.append('../../../RL_lib/Policies/Common')
sys.path.append('../../../RL_lib/Utils')
sys.path.append('../../../Env')
sys.path.append('../../../Imaging')


%load_ext autoreload
%load_ext autoreload
%autoreload 2
%matplotlib nbagg
import os
print(os.getcwd())

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/Users/briangaudet/Study/Subjects/MachineLearning/Projects/Asteroid_CPO_seeker-master/Experiments/Extended/Optimize_HF


In [2]:
%%html
<style>
.output_wrapper, .output {
    height:auto !important;
    max-height:1000px;  /* your desired max-height here */
}
.output_scroll {
    box-shadow:none !important;
    webkit-box-shadow:none !important;
}
</style>

# Optimize Policy

In [4]:
from env import Env
import env_utils as envu
from dynamics_model import Dynamics_model
from lander_model import Lander_model
from ic_gen import Landing_icgen
import rl_utils
import attitude_utils as attu
import optics_utils as optu
from arch_policy_vf_vfu import Arch

from policy_awr import Policy
from softmax_pd import Softmax_pd as PD
from value_function import Value_function

import policy_nets as policy_nets
import valfunc_nets as valfunc_nets


from agent import Agent


import torch.nn as nn

from flat_constraint import Flat_constraint
from glideslope_constraint import Glideslope_constraint
from rh_constraint import RH_constraint
from no_attitude_constraint import Attitude_constraint
from w_constraint import W_constraint
from reward_attitude import Reward
from asteroid_hfr import Asteroid

from thruster_model_cubesat import Thruster_model

from sensor import Sensor
from seeker import Seeker

landing_site_range = 0.0
landing_site = None #np.asarray([-250.,0.,0.])

asteroid_model = Asteroid(landing_site_override=landing_site, omega_range=(1e-5,5e-4))

ap = attu.Quaternion_attitude()

C_cb = optu.rotate_optical_axis(0.0, 0.0, np.pi)
r_cb = np.asarray([0,0,0])
fov=envu.deg2rad(90)
seeker = Seeker(attitude_parameterization=ap, C_cb=C_cb, r_cb=r_cb,
                radome_slope_bounds=(-0.05,0.05), range_bias=(-0.05,0.05),
                   fov=fov, debug=False)
sensor = Sensor(seeker, attitude_parameterization=ap,  use_range=True, apf_tau1=300, use_dp=False,
                      landing_site_range=landing_site_range,
                      pool_type='max', state_type=Sensor.optflow_state_range_dp1)
print(sensor.track_func)
sensor.track_func = sensor.track_func1
print(sensor.track_func)
logger = rl_utils.Logger()
dynamics_model = Dynamics_model(h=2)
thruster_model = Thruster_model(pulsed=True, scale=1.0, offset=0.4)
lander_model = Lander_model(asteroid_model, thruster_model, attitude_parameterization=ap, sensor=sensor, 
                             landing_site_range=landing_site_range, com_range=(-0.10,0.10),
                              attitude_bias=0.05, omega_bias=0.05)

lander_model.get_state_agent = lander_model.get_state_agent_sensor_att_w2

obs_dim = 13
action_dim = 12
actions_per_dim = 2
logit_dim = action_dim * actions_per_dim

recurrent_steps = 60

reward_object = Reward(landing_rlimit=2, landing_vlimit=0.1, 
                       tracking_bias=0.01, fov_coeff=-50., 
                       att_coeff=-0.20,
                       tracking_coeff=-0.5, magv_coeff=-1.0,
                       fuel_coeff=-0.10,  landing_coeff=10.0)

glideslope_constraint = Glideslope_constraint(gs_limit=-1.0)
shape_constraint = Flat_constraint()
attitude_constraint = Attitude_constraint(ap)
w_constraint = W_constraint(w_limit=(0.1,0.1,0.1), w_margin=(0.05,0.05,0.05))
rh_constraint = RH_constraint(rh_limit=150)

wi=0.05
ic_gen = Landing_icgen((800,1000), 
                           p_engine_fail=0.5,
                           engine_fail_scale=(0.5,1.0),
                           lander_wll=(-wi,-wi,-wi),
                           lander_wul=(wi,wi,wi),
                           attitude_parameterization=ap,
                           position_error=(0,np.pi/4),
                           heading_error=(0,np.pi/8),
                           attitude_error=(0,np.pi/16),
                           min_mass=450, max_mass=500,
                           mag_v=(0.05,0.1),
                           debug=False,
                           inertia_uncertainty_diag=10.0,
                           inertia_uncertainty_offdiag=1.0)

env = Env(ic_gen, lander_model, dynamics_model, logger,
          landing_site_range=landing_site_range,
          debug_done=False,
          reward_object=reward_object,
          glideslope_constraint=glideslope_constraint,
          attitude_constraint=attitude_constraint,
          w_constraint=w_constraint,
          rh_constraint=rh_constraint,
          tf_limit=5000.0,print_every=10,nav_period=6)




env.ic_gen.show()

arch = Arch()

policy = Policy(policy_nets.GRU1(obs_dim, logit_dim, recurrent_steps=recurrent_steps,output_network_scale=5), 
               PD(action_dim, actions_per_dim),
               shuffle=False,
               max_grad_norm=30,
               rollout_limit=3,
               kl_limit=0.5,
               init_func=rl_utils.xn_init)
#policy = Policy(policy_nets.GRU1(obs_dim, logit_dim, recurrent_steps=recurrent_steps), 
#                PD(action_dim, actions_per_dim),
#                shuffle=False,
#                kl_targ=0.001,epochs=20, beta=0.1, servo_kl=True, max_grad_norm=30, scale_vector_obs=True,
 #               init_func=rl_utils.xn_init)
value_function = Value_function(valfunc_nets.GRU1(obs_dim, recurrent_steps=recurrent_steps), scale_obs=True,
                                shuffle=False, batch_size=9999999, max_grad_norm=30, 
                                verbose=False)

agent = Agent(arch, policy, value_function, None, env, logger,
              policy_episodes=30, policy_steps=3000, gamma1=0.95, gamma2=0.995, 
              recurrent_steps=recurrent_steps, monitor=env.rl_stats)
agent.train(120000)

Quaternion_attitude
Euler321 Attitude
C_cb: 
[[ 1.0000000e+00  0.0000000e+00 -0.0000000e+00]
 [ 0.0000000e+00 -1.0000000e+00  1.2246468e-16]
 [ 0.0000000e+00 -1.2246468e-16 -1.0000000e+00]]
[ 0.0000000e+00 -1.2246468e-16 -1.0000000e+00]
using max  pooling
V4: Output State type:  <function Sensor.optflow_state_range_dp1 at 0x1380d8378>
<bound method Sensor.track_func1 of <sensor.Sensor object at 0x145f266d8>>
<bound method Sensor.track_func1 of <sensor.Sensor object at 0x145f266d8>>
6dof dynamics model 
thruster model: 
Inertia Tensor:  [[333.33333333   0.           0.        ]
 [  0.         333.33333333   0.        ]
 [  0.           0.         333.33333333]]
Lander Model: 
Reward_terminal equator
queue fixed
Flat Constraint
Attitude Constraint
Rotational Velocity Constraint
Position Hysterises Constraint


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

lander env RHL
Landing_icgen:
[[-1.]
 [ 1.]]
AWR Policy: 
	xn_init: layer  Linear(in_features=13, out_features=130, bias=True)
	xn_init: layer  GRUCell(130, 124)
	xn_init: layer  Linear(in_features=124, out_features=120, bias=True)
	xn_init: layer  Linear(in_features=120, out_features=24, bias=True)
Policy: recurrent steps > 1, disabling shuffle
	Test Mode:          False
	Shuffle :           False
	Shuffle by Chunks:  False
	Max Grad Norm:      30
	Recurrent Steps:    60
	Rollout Limit:      3
	Advantage Func:     <advantage_utils.Adv_relu object at 0x14a738518>
	Advantage Norm:     <function Adv_normalizer.apply at 0x137571378>
	PD:                 <softmax_pd.Softmax_pd object at 0x14a738e80>
Value Funtion
	xn_init: layer  Linear(in_features=13, out_features=130, bias=True)
	xn_init: layer  GRUCell(130, 25)
	xn_init: layer  Linear(in_features=25, out_features=5, bias=True)
	xn_init: layer  Linear(in_features=5, out_features=1, bias=True)
Value Function: recurrent steps > 1, disablin

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0829   0.0255   0.1488   8.4490   2.3854   3.1146
ADVA:  (6238,) (9265,) 0.6732865623313545
ADV1:  0.15262668865534124 0.09924881370957175 0.9414667251222922 1.5078963967678427 -5.053529177174738
ADVB:  (6242,) (9265,) 0.6737182946573125
ADV2:  0.4196697351503181 0.4419058456507836 0.40674247943106795 1.3864029256013564 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0317   0.0091   0.0464   0.1150   0.0878   0.0183
***** Episode 239, Mean R = -217.2  Std R = 74.2  Min R = -379.5
PolicyLoss: 7.03
Policy_Entropy: 0.000359
Policy_KL: 0.00364
Policy_SD: 0.957
Steps: 3.15e+03
TotalSteps: 2.14e+04
VF_0_ExplainedVarNew: 0.0631
VF_0_ExplainedVarOld: 0.0436
VF_0_Loss : 0.835


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2692   0.0949   0.4304   8.4490   2.3854   3.1146
ADVA:  (6719,) (9993,) 0.6723706594616231
ADV1:  0.18223613601932498 0.030568000221225633 0.9335989720347015 1.234570090314122 -5.053529177174738
ADVB:  (681

***** Episode 456, Mean R = -333.6  Std R = 206.4  Min R = -1110.1
PolicyLoss: 5.25
Policy_Entropy: 0.0004
Policy_KL: 0.00387
Policy_SD: 0.955
Steps: 6.6e+03
TotalSteps: 5.84e+04
VF_0_ExplainedVarNew: 0.0214
VF_0_ExplainedVarOld: -0.0149
VF_0_Loss : 0.427


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0823   0.0485   0.1959   8.4490   2.3854   3.1146
ADVA:  (14141,) (18793,) 0.752461022721226
ADV1:  0.189195684559177 0.04437373261775518 0.6493942868669252 1.6707871218085004 -4.20503074254046
ADVB:  (13616,) (18793,) 0.724525089128931
ADV2:  0.28327572021778313 0.32177681967686017 0.30155966437374915 2.4053828948211327 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0238   0.0096   0.0396   0.1150   0.0878   0.0183
***** Episode 487, Mean R = -345.6  Std R = 184.0  Min R = -885.6
PolicyLoss: 4.73
Policy_Entropy: 0.000407
Policy_KL: 0.00478
Policy_SD: 0.956
Steps: 7.07e+03
TotalSteps: 6.54e+04
VF_0_ExplainedVarNew: 0.0946
VF_0_ExplainedVarOld: 0.0764
VF_0_Loss : 0.336




Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0205   0.0076   0.0367   0.1150   0.0878   0.0183
***** Episode 704, Mean R = -483.7  Std R = 173.4  Min R = -1091.3
PolicyLoss: 5.22
Policy_Entropy: 0.000448
Policy_KL: 0.00347
Policy_SD: 0.955
Steps: 1.15e+04
TotalSteps: 1.35e+05
VF_0_ExplainedVarNew: 0.523
VF_0_ExplainedVarOld: 0.468
VF_0_Loss : 0.0758


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0296   0.0132   0.0606   8.4490   2.3854   3.1146
ADVA:  (20225,) (33553,) 0.6027776949900158
ADV1:  0.020649871340619574 -0.0023465036489025938 0.26972899973286235 1.7534134670080779 -4.048988688285824
ADVB:  (20038,) (33553,) 0.5972044228533961
ADV2:  0.13838163678594678 0.29204697196927354 0.3699977863492343 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0225   0.0089   0.0403   0.1150   0.0878   0.0183
***** Episode 735, Mean R = -461.9  Std R = 184.5  Min R = -998.7
PolicyLoss: 5.17
Policy_Entropy: 0.000459
Policy_KL: 0.00467
Policy_SD: 0.955
Steps: 1.09e+04
T

attitude |    0.01   -0.01    0.01 |    1.80    0.69    1.82 |   -3.14   -1.56   -3.14 |    3.14    1.57    3.14
w        |   -0.00    0.00    0.00 |    0.02    0.03    0.02 |   -0.10   -0.10   -0.10 |    0.10    0.10    0.10
a_f      |   -0.03   -0.05 |    0.67    1.86 |   -1.46   -3.14 |    1.46    3.14
w_f      |   -0.01    0.00    0.00 |    0.03    0.03    0.03 |   -0.10   -0.10   -0.10 |    0.10    0.10    0.10
w_rewards |   -4.55 |    4.42 |  -27.13 |    0.00
w_penalty |   -4.84 |   21.46 | -100.00 |    0.00
fov_penalty |  -15.32 |   23.05 |  -50.00 |    0.00
theta_cv |    1.54 |    0.69 |    0.00 |    3.13
seeker_angles |    0.02    0.02 |    0.32    0.31 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0240  0.0214 |  0.3195  0.3095 | -0.9995 -0.9998 |  0.9999  0.9999
optical_flow |  0.0002  0.0001 |  0.0020  0.0020 | -0.0178 -0.0130 |  0.0362  0.0274
v_err    | -0.4669 |  0.1700 | -1.1085 |  0.8266
landing_rewards |    0.00 |    0.00 |    0.00 |    0.00
landing_margin |  904

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0580   0.0227   0.1238   0.1238   0.0878   0.0227
Update Cnt = 40    ET =   1224.6   Stats:  Mean, Std, Min, Max
r_f      |   14.95   10.85  -34.35 |  560.22  534.25  646.15 |-1386.59-1521.73-1374.18 | 1336.46 1346.20 1373.46
v_f      |   -0.00    0.01    0.00 |    0.42    0.43    0.42 |   -1.17   -1.29   -0.94 |    1.23    1.15    1.17
r_i      |   13.61  -11.40  -51.10 |  671.00  645.27  780.77 |-1322.56-1372.95-1361.55 | 1314.63 1256.56 1259.40
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |  749.65 |  284.51 |   80.95 | 1302.39
norm_vf  |    0.69 |    0.25 |    0.12 |    1.53
gs_f     |    0.98 |    1.16 |    0.02 |    9.65
thrust   |   -0.00    0.00    0.00 |    0.99    1.00    0.99 |   -3.44   -3.44   -3.36 |    3.41    3.46    3.46
norm_thrust |    1.59 |    0.65 |    0.00 |    3.46
fuel     |    5.36 |    1.90 |    1.91 |   13.86
rewards  | -380.20 | 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0468   0.0141   0.0829   0.1405   0.0878   0.0254
***** Episode 1541, Mean R = -209.1  Std R = 39.2  Min R = -330.7
PolicyLoss: 5.31
Policy_Entropy: 0.00105
Policy_KL: 0.00894
Policy_SD: 0.955
Steps: 7.68e+03
TotalSteps: 3.99e+05
VF_0_ExplainedVarNew: 0.86
VF_0_ExplainedVarOld: 0.825
VF_0_Loss : 0.0942


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0515   0.0229   0.0930   8.4490   2.3854   3.1146
ADVA:  (14035,) (22554,) 0.622284295468653
ADV1:  0.0 -0.001273185325519089 0.16129047084162376 1.85415509223938 -1.1862321821848574
ADVB:  (9904,) (22554,) 0.4391238804646626
ADV2:  0.0 0.24212159778339468 0.46229663468848264 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1038   0.0423   0.2181   0.2181   0.1038   0.0423
***** Episode 1572, Mean R = -206.7  Std R = 39.7  Min R = -351.7
PolicyLoss: 5.48
Policy_Entropy: 0.00108
Policy_KL: 0.013
Policy_SD: 0.956
Steps: 7.28e+03
TotalSteps: 4.06e+05
VF_0_ExplainedVarNew: 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0815   0.0279   0.1568   0.2181   0.1038   0.0423
***** Episode 1789, Mean R = -187.7  Std R = 38.1  Min R = -330.9
PolicyLoss: 5.56
Policy_Entropy: 0.00132
Policy_KL: 0.00982
Policy_SD: 0.955
Steps: 7.86e+03
TotalSteps: 4.6e+05
VF_0_ExplainedVarNew: 0.891
VF_0_ExplainedVarOld: 0.878
VF_0_Loss : 0.0746


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0381   0.0156   0.0719   8.4490   2.3854   3.1146
ADVA:  (12577,) (22818,) 0.5511876588658077
ADV1:  0.0 0.001914944055494794 0.1424279297204135 2.486826089223226 -0.9732469451515255
ADVB:  (12629,) (22818,) 0.5534665614865457
ADV2:  0.06911163113148627 0.2834270253536189 0.4858601375712672 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0557   0.0196   0.1040   0.2181   0.1038   0.0423
***** Episode 1820, Mean R = -175.0  Std R = 23.3  Min R = -266.4
PolicyLoss: 5.02
Policy_Entropy: 0.00139
Policy_KL: 0.0105
Policy_SD: 0.955
Steps: 7.39e+03
TotalSteps: 4.67e+05
VF_0_E

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0901   0.0313   0.1782   0.2181   0.1038   0.0423
***** Episode 2037, Mean R = -173.7  Std R = 26.3  Min R = -223.4
PolicyLoss: 5.41
Policy_Entropy: 0.00183
Policy_KL: 0.00981
Policy_SD: 0.956
Steps: 8.34e+03
TotalSteps: 5.24e+05
VF_0_ExplainedVarNew: 0.905
VF_0_ExplainedVarOld: 0.893
VF_0_Loss : 0.0657


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0375   0.0187   0.1009   8.4490   2.3854   3.1146
ADVA:  (13855,) (25405,) 0.5453650856130683
ADV1:  0.005657762175054426 0.011189181860143142 0.13409239191425104 2.231442272098253 -1.1215185546875022
ADVB:  (14683,) (25405,) 0.5779570950600276
ADV2:  0.08501727084728106 0.30104980747873417 0.5582543755196059 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0733   0.0244   0.1249   0.2181   0.1038   0.0423
***** Episode 2068, Mean R = -180.3  Std R = 33.5  Min R = -299.5
PolicyLoss: 4.98
Policy_Entropy: 0.00188
Policy_KL: 0.0103
Policy_SD: 0.955
Steps: 8.72e+03
TotalSt

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0723   0.0263   0.1523   0.2199   0.1142   0.0445
***** Episode 2285, Mean R = -153.4  Std R = 17.6  Min R = -207.8
PolicyLoss: 3.67
Policy_Entropy: 0.00224
Policy_KL: 0.0103
Policy_SD: 0.951
Steps: 7.83e+03
TotalSteps: 5.9e+05
VF_0_ExplainedVarNew: 0.952
VF_0_ExplainedVarOld: 0.938
VF_0_Loss : 0.057


Dynamics: Max Disturbance (m/s^2):  [0.00121875 0.00114861 0.00112701] 0.0020186136790816874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0530   0.0198   0.1077   8.4490   2.3854   3.1146
ADVA:  (13887,) (23747,) 0.5847896576409651
ADV1:  0.0 0.004162792717993237 0.1456885657756098 2.0303072611490887 -4.540982731112083
ADVB:  (13030,) (23747,) 0.5487008885332885
ADV2:  0.04232234488457511 0.25447359467610675 0.4776520562162997 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1123   0.0407   0.2175   0.2199   0.1142   0.0445
***** Episode 2316, Mean R = -161.8  Std R = 27.9  Min R = -252.9
PolicyLoss: 4.37
Policy_Ent

v_err    | -0.0521 |  0.1342 | -0.5330 |  0.6703
landing_rewards |    0.00 |    0.00 |    0.00 |    0.00
landing_margin |  287.75 |  149.23 |    0.60 |  920.47
tracking_rewards |  -97.96 |   22.88 | -183.90 |   -9.19
steps    |     262 |      51 |      12 |     381
***** Episode 2533, Mean R = -152.4  Std R = 23.2  Min R = -225.5
PolicyLoss: 3.85
Policy_Entropy: 0.00312
Policy_KL: 0.0153
Policy_SD: 0.951
Steps: 8.12e+03
TotalSteps: 6.55e+05
VF_0_ExplainedVarNew: 0.868
VF_0_ExplainedVarOld: 0.835
VF_0_Loss : 0.0592


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0513   0.0237   0.0969   8.4490   2.3854   3.1146
ADVA:  (13721,) (24585,) 0.5581045352857433
ADV1:  0.0 -0.0013462010881698354 0.16814120614101652 2.0950281302134197 -4.486221525681514
ADVB:  (13855,) (24585,) 0.5635550132194428
ADV2:  0.059777871561526426 0.2437255145225443 0.4698528194001958 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1573   0.0618   0.2507   0.2763   0.1657   0.0621
***** Episode 2564

attitude |   -0.06   -0.02   -0.13 |    1.31    0.66    1.90 |   -3.14   -1.56   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.02    0.02 |   -0.07   -0.10   -0.07 |    0.09    0.10    0.09
a_f      |   -0.07   -0.03 |    0.64    1.91 |   -1.52   -3.13 |    1.30    3.11
w_f      |    0.00    0.00    0.00 |    0.01    0.02    0.01 |   -0.03   -0.05   -0.05 |    0.06    0.10    0.04
w_rewards |   -0.90 |    0.61 |   -3.29 |   -0.01
w_penalty |   -2.26 |   14.86 | -100.00 |    0.00
fov_penalty |  -45.16 |   14.78 |  -50.00 |    0.00
theta_cv |    0.62 |    0.35 |    0.00 |    2.97
seeker_angles |   -0.13   -0.05 |    0.34    0.30 |   -1.00   -1.00 |    1.00    1.00
cs_angles | -0.1264 -0.0536 |  0.3414  0.3012 | -1.0000 -1.0000 |  0.9997  0.9998
optical_flow | -0.0022 -0.0020 |  0.0065  0.0066 | -0.0690 -0.1020 |  0.0738  0.1379
v_err    | -0.0449 |  0.1292 | -0.6042 |  1.0223
landing_rewards |    0.00 |    0.00 |    0.00 |    0.00
landing_margin |  295

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2075   0.0839   0.3632   0.3955   0.2219   0.0863
Update Cnt = 100    ET =   1003.3   Stats:  Mean, Std, Min, Max
r_f      |   75.69  -33.66    5.41 |  298.12  288.41  327.65 | -771.42 -720.22 -760.49 |  698.59  826.50  921.95
v_f      |   -0.01   -0.06   -0.04 |    0.39    0.43    0.48 |   -1.31   -1.13   -1.24 |    1.17    1.24    0.99
r_i      |   19.32   -8.28   40.51 |  636.03  670.61  786.82 |-1315.32-1246.29-1354.25 | 1237.73 1334.74 1293.04
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.08   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |  263.15 |  122.75 |   30.76 |  818.13
norm_vf  |    0.72 |    0.24 |    0.19 |    1.73
gs_f     |    1.20 |    1.67 |    0.02 |   20.96
thrust   |   -0.00   -0.02   -0.01 |    0.97    0.95    0.96 |   -3.42   -3.40   -3.45 |    3.30    3.39    3.45
norm_thrust |    1.53 |    0.65 |    0.00 |    3.46
fuel     |    3.85 |    0.77 |    0.33 |    6.53
rewards  | -149.94 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2222   0.0815   0.4043   0.4043   0.2222   0.0863
***** Episode 3401, Mean R = -143.4  Std R = 18.6  Min R = -184.5
PolicyLoss: 3.7
Policy_Entropy: 0.00533
Policy_KL: 0.0113
Policy_SD: 0.941
Steps: 8.74e+03
TotalSteps: 8.88e+05
VF_0_ExplainedVarNew: 0.87
VF_0_ExplainedVarOld: 0.836
VF_0_Loss : 0.0601


Dynamics: Max Disturbance (m/s^2):  [0.00121875 0.00114861 0.00112701] 0.0020186136790816874
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0469   0.0171   0.0959   8.4490   2.3854   3.1146
ADVA:  (15907,) (26834,) 0.5927927256465678
ADV1:  0.000709269228141074 0.002717705875768556 0.16285475430009763 2.1645468957343965 -1.6524359607696555
ADVB:  (13372,) (26834,) 0.4983230230304837
ADV2:  0.0 0.21764152329512618 0.49215928383966 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2066   0.0853   0.3592   0.4043   0.2222   0.0863
***** Episode 3432, Mean R = -150.3  Std R = 24.8  Min R = -205.5
PolicyLoss: 3.69
Policy_En

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2333   0.0808   0.4064   0.5713   0.3181   0.1292
***** Episode 3649, Mean R = -156.8  Std R = 25.8  Min R = -232.1
PolicyLoss: 3.92
Policy_Entropy: 0.00668
Policy_KL: 0.0152
Policy_SD: 0.939
Steps: 9.21e+03
TotalSteps: 9.6e+05
VF_0_ExplainedVarNew: 0.85
VF_0_ExplainedVarOld: 0.82
VF_0_Loss : 0.0666


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0354   0.0142   0.0618   8.4490   2.3854   3.1146
ADVA:  (12230,) (27002,) 0.45292941263610104
ADV1:  0.0 0.005734699851218123 0.17321048581689846 2.310588000615438 -1.3232317348160225
ADVB:  (13705,) (27002,) 0.5075549959262277
ADV2:  0.006140905129013225 0.25162071473473485 0.5722388048543172 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2315   0.0817   0.3989   0.5713   0.3181   0.1292
***** Episode 3680, Mean R = -144.9  Std R = 21.5  Min R = -196.8
PolicyLoss: 4.04
Policy_Entropy: 0.00673
Policy_KL: 0.0165
Policy_SD: 0.939
Steps: 9.31e+03
TotalSteps: 9.69e+05
VF_0_

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1828   0.0628   0.3279   0.5713   0.3181   0.1292
***** Episode 3897, Mean R = -145.1  Std R = 20.6  Min R = -209.9
PolicyLoss: 4.04
Policy_Entropy: 0.00791
Policy_KL: 0.0117
Policy_SD: 0.94
Steps: 8.97e+03
TotalSteps: 1.03e+06
VF_0_ExplainedVarNew: 0.882
VF_0_ExplainedVarOld: 0.837
VF_0_Loss : 0.0695


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0211   0.0041   0.0285   8.4490   2.3854   3.1146
ADVA:  (14329,) (27380,) 0.5233382030679328
ADV1:  0.0 0.005507126571856556 0.17717578868044198 1.9421772638956705 -4.705848205401446
ADVB:  (14459,) (27380,) 0.5280861943024105
ADV2:  0.022094541595128095 0.24296247213014047 0.5220214620248461 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1811   0.0710   0.3264   0.5713   0.3181   0.1292
***** Episode 3928, Mean R = -142.4  Std R = 17.0  Min R = -184.5
PolicyLoss: 3.65
Policy_Entropy: 0.0081
Policy_KL: 0.0131
Policy_SD: 0.94
Steps: 8.96e+03
TotalSteps: 1.04e+06
VF_0_E

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1297   0.0457   0.2546   0.6404   0.3181   0.1422
***** Episode 4145, Mean R = -152.3  Std R = 17.8  Min R = -194.0
PolicyLoss: 3.2
Policy_Entropy: 0.00859
Policy_KL: 0.0219
Policy_SD: 0.942
Steps: 9.62e+03
TotalSteps: 1.11e+06
VF_0_ExplainedVarNew: 0.919
VF_0_ExplainedVarOld: 0.906
VF_0_Loss : 0.0485


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0365   0.0152   0.0727   8.4490   2.3854   3.1146
ADVA:  (17388,) (29176,) 0.5959692898272553
ADV1:  0.0 0.0030925117981853074 0.1506333494918656 2.3968546962738038 -1.4766983715693178
ADVB:  (12577,) (29176,) 0.43107348505621057
ADV2:  0.0 0.22281586428172004 0.5271025133697617 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1954   0.0634   0.3114   0.6404   0.3181   0.1422
***** Episode 4176, Mean R = -161.3  Std R = 38.8  Min R = -306.8
PolicyLoss: 4.02
Policy_Entropy: 0.00902
Policy_KL: 0.0193
Policy_SD: 0.94
Steps: 1.01e+04
TotalSteps: 1.12e+06
VF_0_ExplainedVarNew

***** Episode 4393, Mean R = -148.0  Std R = 15.8  Min R = -174.8
PolicyLoss: 3.65
Policy_Entropy: 0.0104
Policy_KL: 0.0106
Policy_SD: 0.939
Steps: 9.63e+03
TotalSteps: 1.19e+06
VF_0_ExplainedVarNew: 0.871
VF_0_ExplainedVarOld: 0.843
VF_0_Loss : 0.0646


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0248   0.0077   0.0447   8.4490   2.3854   3.1146
ADVA:  (15554,) (28624,) 0.5433901621017329
ADV1:  0.0 0.0003697479540391373 0.16037345684984217 2.079011885325114 -4.055131383859147
ADVB:  (15189,) (28624,) 0.5306386249301286
ADV2:  0.023218235455500696 0.2372752725549332 0.49548473331181364 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2822   0.1143   0.5551   0.9411   0.4579   0.2310
***** Episode 4424, Mean R = -152.8  Std R = 24.8  Min R = -228.1
PolicyLoss: 3.43
Policy_Entropy: 0.0107
Policy_KL: 0.0109
Policy_SD: 0.939
Steps: 9.50e+03
TotalSteps: 1.2e+06
VF_0_ExplainedVarNew: 0.845
VF_0_ExplainedVarOld: 0.812
VF_0_Loss : 0.0639


Dynamics: Max Disturbance (m/s^2

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.08   -0.10   -0.05 |    0.10    0.10    0.10
a_f      |    0.05    0.03 |    0.67    1.94 |   -1.46   -3.12 |    1.49    3.10
w_f      |   -0.00    0.00   -0.00 |    0.01    0.02    0.01 |   -0.04   -0.06   -0.04 |    0.10    0.10    0.10
w_rewards |   -0.84 |    0.52 |   -3.35 |   -0.04
w_penalty |   -1.61 |   12.60 | -100.00 |    0.00
fov_penalty |  -44.84 |   15.21 |  -50.00 |    0.00
theta_cv |    0.69 |    0.36 |    0.00 |    2.78
seeker_angles |   -0.05   -0.02 |    0.31    0.31 |   -1.00   -1.00 |    1.00    1.00
cs_angles | -0.0521 -0.0198 |  0.3058  0.3091 | -1.0000 -0.9998 |  0.9999  0.9995
optical_flow | -0.0000  0.0000 |  0.0124  0.0131 | -0.6979 -0.4040 |  0.3016  0.3205
v_err    | -0.0302 |  0.0899 | -0.7638 |  0.6199
landing_rewards |    0.00 |    0.00 |    0.00 |    0.00
landing_margin |  136.75 |  145.43 |    4.17 | 1078.52
tracking_rewards |  -87.13 |   21.34 | -163.96 |  -10.36
steps    |     314 |  

Update Cnt = 160    ET =   1243.6   Stats:  Mean, Std, Min, Max
r_f      |   16.18   22.23   -4.78 |  234.27  229.78  266.44 | -743.45 -523.51 -782.87 |  675.47 1078.23  596.94
v_f      |   -0.02    0.04   -0.01 |    0.40    0.39    0.40 |   -1.20   -0.96   -1.06 |    0.99    1.21    1.40
r_i      |   -6.57   53.95    8.63 |  663.09  664.35  771.66 |-1311.11-1213.94-1328.07 | 1321.19 1332.65 1355.59
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.09
norm_rf  |  133.12 |  104.65 |    4.96 |  950.10
norm_vf  |    0.64 |    0.25 |    0.17 |    1.46
gs_f     |    1.28 |    5.80 |    0.02 |  100.00
thrust   |   -0.00    0.01    0.00 |    1.00    1.00    0.99 |   -3.45   -3.44   -3.46 |    3.36    3.45    3.44
norm_thrust |    1.59 |    0.67 |    0.00 |    3.46
fuel     |    4.11 |    0.57 |    0.22 |    5.34
rewards  | -145.25 |   23.42 | -219.84 |  -64.49
fuel_rewards |  -11.75 |    1.64 |  -15.28 |   -0.63
glideslope_rewards | 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1780   0.0667   0.3062   3.9819   1.7380   0.9725
***** Episode 5261, Mean R = -139.6  Std R = 22.4  Min R = -184.1
PolicyLoss: 2.63
Policy_Entropy: 0.0159
Policy_KL: 0.0108
Policy_SD: 0.93
Steps: 9.21e+03
TotalSteps: 1.46e+06
VF_0_ExplainedVarNew: 0.888
VF_0_ExplainedVarOld: 0.843
VF_0_Loss : 0.0499


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0219   0.0095   0.0457   8.4490   2.3854   3.1146
ADVA:  (18974,) (28951,) 0.6553832337397671
ADV1:  0.0 -0.002152685737774799 0.16484351043973897 2.491474984486898 -1.7409200994173708
ADVB:  (9757,) (28951,) 0.33701771959517807
ADV2:  0.0 0.1500633276601376 0.4429653304952558 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5607   0.2493   1.0690   3.9819   1.7380   0.9725
***** Episode 5292, Mean R = -143.5  Std R = 17.9  Min R = -185.5
PolicyLoss: 3.12
Policy_Entropy: 0.016
Policy_KL: 0.0149
Policy_SD: 0.928
Steps: 9.98e+03
TotalSteps: 1.47e+06
VF_0_ExplainedVarNew: 0.

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4354   0.2273   0.9080   3.9819   1.7380   0.9725
***** Episode 5509, Mean R = -140.7  Std R = 26.5  Min R = -218.8
PolicyLoss: 3.17
Policy_Entropy: 0.0173
Policy_KL: 0.00786
Policy_SD: 0.925
Steps: 1.01e+04
TotalSteps: 1.54e+06
VF_0_ExplainedVarNew: 0.89
VF_0_ExplainedVarOld: 0.839
VF_0_Loss : 0.0453


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0413   0.0145   0.0743   8.4490   2.3854   3.1146
ADVA:  (15353,) (29914,) 0.5132379487865214
ADV1:  0.004205101567563568 0.007831219040305669 0.15646936466019457 2.447710959116618 -1.6888616819304758
ADVB:  (18250,) (29914,) 0.6100822357424617
ADV2:  0.0855690532664275 0.27411836480899865 0.522891804641031 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0481   0.6028   2.3107   3.9819   1.7380   0.9725
***** Episode 5540, Mean R = -145.6  Std R = 18.0  Min R = -188.6
PolicyLoss: 3.17
Policy_Entropy: 0.0173
Policy_KL: 0.00977
Policy_SD: 0.925
Steps: 9.91e+03
TotalSteps:

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4086   0.1740   0.8378   3.9819   1.7380   0.9725
***** Episode 5757, Mean R = -143.2  Std R = 26.1  Min R = -182.2
PolicyLoss: 2.89
Policy_Entropy: 0.0194
Policy_KL: 0.015
Policy_SD: 0.925
Steps: 9.91e+03
TotalSteps: 1.62e+06
VF_0_ExplainedVarNew: 0.865
VF_0_ExplainedVarOld: 0.827
VF_0_Loss : 0.0488


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0222   0.0084   0.0406   8.4490   2.3854   3.1146
ADVA:  (18883,) (30390,) 0.6213557091148404
ADV1:  0.0 0.004404804496809584 0.165599343662333 2.547238883972168 -1.4186305427551291
ADVB:  (12436,) (30390,) 0.4092135570911484
ADV2:  0.0 0.17591066320966253 0.47734690703397414 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2955   0.1225   0.5758   3.9819   1.7380   0.9725
***** Episode 5788, Mean R = -138.0  Std R = 27.3  Min R = -188.9
PolicyLoss: 2.89
Policy_Entropy: 0.0194
Policy_KL: 0.017
Policy_SD: 0.924
Steps: 1.02e+04
TotalSteps: 1.63e+06
VF_0_ExplainedVarNew: 0.8

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1878   0.0676   0.3687   3.9819   1.7380   0.9725
***** Episode 6005, Mean R = -140.5  Std R = 21.1  Min R = -191.1
PolicyLoss: 2.92
Policy_Entropy: 0.0201
Policy_KL: 0.00808
Policy_SD: 0.919
Steps: 1.02e+04
TotalSteps: 1.7e+06
VF_0_ExplainedVarNew: 0.861
VF_0_ExplainedVarOld: 0.828
VF_0_Loss : 0.0492


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0242   0.0091   0.0442   8.4490   2.3854   3.1146
ADVA:  (14591,) (30849,) 0.47298129599014554
ADV1:  0.0 -0.0005201152039479122 0.16698884481523935 2.4013506351424025 -1.4487322298685732
ADVB:  (15070,) (30849,) 0.48850854160588675
ADV2:  0.0 0.22400228316973386 0.5000926528118639 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.2151   1.3775   5.5901   5.5901   2.2151   1.3775
***** Episode 6036, Mean R = -142.2  Std R = 18.9  Min R = -185.6
PolicyLoss: 3.1
Policy_Entropy: 0.0202
Policy_KL: 0.013
Policy_SD: 0.919
Steps: 1.02e+04
TotalSteps: 1.71e+06
VF_0_ExplainedVarNe

***** Episode 6253, Mean R = -134.7  Std R = 28.0  Min R = -183.9
PolicyLoss: 3.09
Policy_Entropy: 0.0215
Policy_KL: 0.0132
Policy_SD: 0.914
Steps: 1.05e+04
TotalSteps: 1.78e+06
VF_0_ExplainedVarNew: 0.795
VF_0_ExplainedVarOld: 0.75
VF_0_Loss : 0.0556


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0454   0.0164   0.0735   8.4490   2.3854   3.1146
ADVA:  (18213,) (31570,) 0.5769084573962623
ADV1:  0.0084183140482893 0.004478958493288912 0.1763727199065376 2.333195743560791 -1.6546635643641174
ADVB:  (18816,) (31570,) 0.5960088691796009
ADV2:  0.05438582814688094 0.19546701261189559 0.45203887323139746 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4341   0.1209   0.6577   5.5901   2.2361   1.3775
***** Episode 6284, Mean R = -132.1  Std R = 24.7  Min R = -196.3
PolicyLoss: 2.2
Policy_Entropy: 0.0214
Policy_KL: 0.00758
Policy_SD: 0.913
Steps: 1.06e+04
TotalSteps: 1.79e+06
VF_0_ExplainedVarNew: 0.844
VF_0_ExplainedVarOld: 0.781
VF_0_Loss : 0.0518


Dynamics: Max Dist

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.09   -0.05 |    0.08    0.10    0.09
a_f      |   -0.03    0.09 |    0.66    1.90 |   -1.33   -3.12 |    1.41    3.14
w_f      |    0.00   -0.00   -0.00 |    0.01    0.02    0.01 |   -0.07   -0.05   -0.04 |    0.04    0.06    0.03
w_rewards |   -0.88 |    0.42 |   -2.46 |   -0.03
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |  -44.84 |   15.21 |  -50.00 |    0.00
theta_cv |    0.67 |    0.33 |    0.00 |    2.91
seeker_angles |   -0.01   -0.00 |    0.26    0.31 |   -1.00   -1.00 |    1.00    1.00
cs_angles | -0.0148 -0.0029 |  0.2597  0.3079 | -1.0000 -0.9999 |  0.9994  0.9999
optical_flow | -0.0001 -0.0001 |  0.0122  0.0177 | -0.4982 -0.4370 |  0.3706  0.7969
v_err    | -0.0252 |  0.0782 | -0.4697 |  0.5428
landing_rewards |    0.00 |    0.00 |    0.00 |    0.00
landing_margin |   82.38 |   81.81 |    0.09 |  409.24
tracking_rewards |  -81.16 |   18.33 | -160.68 |  -42.63
steps    |     335 |  

Update Cnt = 220    ET =   1162.9   Stats:  Mean, Std, Min, Max
r_f      |    6.18  -17.63   -7.01 |  211.99  190.50  236.61 | -622.24 -664.48 -756.58 |  569.57  409.57  659.68
v_f      |   -0.02   -0.03    0.03 |    0.32    0.33    0.32 |   -1.24   -1.11   -0.87 |    0.92    1.14    1.31
r_i      |   32.32  -38.74  -41.18 |  708.43  637.97  749.32 |-1362.91-1281.78-1337.14 | 1300.72 1289.32 1366.30
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.10    0.10    0.10
norm_rf  |   81.30 |   78.90 |    0.62 |  478.97
norm_vf  |    0.50 |    0.25 |    0.09 |    1.73
gs_f     |    1.05 |    1.95 |    0.02 |   22.06
thrust   |   -0.01   -0.01    0.01 |    0.97    0.98    0.96 |   -3.46   -3.36   -3.43 |    3.46    3.40    3.43
norm_thrust |    1.54 |    0.68 |    0.00 |    3.46
fuel     |    3.83 |    0.39 |    2.13 |    4.81
rewards  | -132.11 |   25.53 | -204.24 |  -50.68
fuel_rewards |  -10.97 |    1.12 |  -13.77 |   -6.10
glideslope_rewards | 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1751   0.0550   0.3256  14.9510   4.9607   4.3800
***** Episode 7121, Mean R = -131.7  Std R = 30.4  Min R = -182.9
PolicyLoss: 2.24
Policy_Entropy: 0.0245
Policy_KL: 0.00943
Policy_SD: 0.909
Steps: 1.06e+04
TotalSteps: 2.08e+06
VF_0_ExplainedVarNew: 0.822
VF_0_ExplainedVarOld: 0.794
VF_0_Loss : 0.0564


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0410   0.0184   0.0738   8.4490   2.3854   3.1146
ADVA:  (15061,) (31737,) 0.4745565113274727
ADV1:  0.0 -0.007331918972273427 0.1691849706284423 2.146161584854126 -1.3923883597857571
ADVB:  (18400,) (31737,) 0.5797649431263194
ADV2:  0.04916277507742456 0.20471912832854983 0.4695213850890209 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1957   0.0790   0.4204  14.9510   4.9607   4.3800
***** Episode 7152, Mean R = -139.8  Std R = 16.5  Min R = -175.6
PolicyLoss: 2.31
Policy_Entropy: 0.0248
Policy_KL: 0.00987
Policy_SD: 0.908
Steps: 1.05e+04
TotalSteps: 2.09e+06
VF_0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8683   0.4986   1.9654  14.9510   4.9607   4.3800
***** Episode 7369, Mean R = -141.5  Std R = 22.3  Min R = -190.1
PolicyLoss: 2.55
Policy_Entropy: 0.0263
Policy_KL: 0.00632
Policy_SD: 0.906
Steps: 1.03e+04
TotalSteps: 2.16e+06
VF_0_ExplainedVarNew: 0.857
VF_0_ExplainedVarOld: 0.811
VF_0_Loss : 0.0563


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0560   0.0249   0.0981   8.4490   2.3854   3.1146
ADVA:  (17713,) (31855,) 0.5560508554387067
ADV1:  0.0 -0.00213832939681539 0.18167102283574962 2.454957125981649 -1.6261987702051819
ADVB:  (15505,) (31855,) 0.486736776016324
ADV2:  0.0 0.19745295270788993 0.4828656379880977 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9873   0.5501   2.0542  14.9510   4.9607   4.3800
***** Episode 7400, Mean R = -125.4  Std R = 31.8  Min R = -201.9
PolicyLoss: 2.6
Policy_Entropy: 0.0262
Policy_KL: 0.00997
Policy_SD: 0.906
Steps: 1.07e+04
TotalSteps: 2.17e+06
VF_0_ExplainedVarNew: 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.0908   0.8710   3.9458  46.1133  14.3235  11.3691
***** Episode 7617, Mean R = -129.4  Std R = 27.3  Min R = -178.5
PolicyLoss: 2.62
Policy_Entropy: 0.0269
Policy_KL: 0.0385
Policy_SD: 0.898
Steps: 1.06e+04
TotalSteps: 2.25e+06
VF_0_ExplainedVarNew: 0.757
VF_0_ExplainedVarOld: 0.69
VF_0_Loss : 0.0584


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0277   0.0086   0.0514   8.4490   2.3854   3.1146
ADVA:  (17979,) (32087,) 0.5603203789696762
ADV1:  0.0 0.002106585539727966 0.19131607415845922 1.987209186402999 -1.949792774518333
ADVB:  (16766,) (32087,) 0.5225169071586624
ADV2:  0.012352621955121426 0.20737432955935667 0.5140087902024838 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6458   0.3142   1.3849  46.1133  14.3235  11.3691
***** Episode 7648, Mean R = -125.5  Std R = 29.5  Min R = -176.7
PolicyLoss: 2.52
Policy_Entropy: 0.0273
Policy_KL: 0.0213
Policy_SD: 0.896
Steps: 1.07e+04
TotalSteps: 2.26e+06
VF_0_Ex

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2766   0.1162   0.6232  46.1133  14.3235  11.3691
***** Episode 7865, Mean R = -129.1  Std R = 27.9  Min R = -173.9
PolicyLoss: 2.5
Policy_Entropy: 0.0273
Policy_KL: 0.00615
Policy_SD: 0.889
Steps: 1.08e+04
TotalSteps: 2.33e+06
VF_0_ExplainedVarNew: 0.725
VF_0_ExplainedVarOld: 0.66
VF_0_Loss : 0.0647


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0449   0.0111   0.0627   8.4490   2.3854   3.1146
ADVA:  (16363,) (32297,) 0.506641483729139
ADV1:  0.006939318705548633 0.009171349011813395 0.1973231461025316 2.0745179097947792 -1.7377331280708335
ADVB:  (20254,) (32297,) 0.6271170696968759
ADV2:  0.07019320959998836 0.23959431831542213 0.5267444147463236 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2039   0.0661   0.3452  46.1133  14.3235  11.3691
***** Episode 7896, Mean R = -116.7  Std R = 32.1  Min R = -173.1
PolicyLoss: 2.44
Policy_Entropy: 0.0274
Policy_KL: 0.00426
Policy_SD: 0.89
Steps: 1.08e+04
TotalSteps: 

v_err    | -0.0196 |  0.0735 | -0.5114 |  0.5701
landing_rewards |    0.26 |    1.59 |    0.00 |   10.00
landing_margin |   40.87 |   55.83 |   -0.06 |  360.24
tracking_rewards |  -79.39 |   19.50 | -174.92 |  -40.27
steps    |     352 |      32 |     185 |     418
***** Episode 8113, Mean R = -122.0  Std R = 42.0  Min R = -239.2
PolicyLoss: 2.66
Policy_Entropy: 0.0271
Policy_KL: 0.00624
Policy_SD: 0.878
Steps: 1.07e+04
TotalSteps: 2.42e+06
VF_0_ExplainedVarNew: 0.702
VF_0_ExplainedVarOld: 0.652
VF_0_Loss : 0.0614


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0432   0.0223   0.0931   8.4490   2.3854   3.1146
ADVA:  (19314,) (32461,) 0.5949909121715289
ADV1:  0.0 -0.010083388678221325 0.20746693855222934 2.0868735406841505 -2.288335087299349
ADVB:  (15414,) (32461,) 0.4748467391639198
ADV2:  0.0 0.1744835975628907 0.4759787233851051 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1897   0.0536   0.3166  46.1133  14.3235  11.3691
***** Episode 8144, Mean R = -127.9 

theta_cv |    0.63 |    0.32 |    0.00 |    3.03
seeker_angles |   -0.02    0.01 |    0.26    0.29 |   -1.00   -1.00 |    1.00    1.00
cs_angles | -0.0248  0.0072 |  0.2601  0.2896 | -0.9979 -0.9998 |  0.9998  0.9998
optical_flow |  0.0000  0.0000 |  0.0167  0.0208 | -0.9427 -0.7465 |  0.9965  1.0098
v_err    | -0.0204 |  0.0735 | -0.5281 |  0.4800
landing_rewards |    0.16 |    1.26 |    0.00 |   10.00
landing_margin |   46.77 |   64.47 |   -0.05 |  387.24
tracking_rewards |  -79.55 |   18.87 | -148.27 |  -45.18
steps    |     351 |      32 |     220 |     417
***** Episode 8423, Mean R = -129.9  Std R = 25.8  Min R = -180.5
PolicyLoss: 2.15
Policy_Entropy: 0.0292
Policy_KL: 0.00686
Policy_SD: 0.873
Steps: 1.08e+04
TotalSteps: 2.53e+06
VF_0_ExplainedVarNew: 0.821
VF_0_ExplainedVarOld: 0.797
VF_0_Loss : 0.0517


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0188   0.0053   0.0294   8.4490   2.3854   3.1146
ADVA:  (21060,) (32601,) 0.6459924542191957
ADV1:  0.0 -0.005404401556030

attitude |   -0.02   -0.04   -0.11 |    1.14    0.64    1.80 |   -3.14   -1.55   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.10   -0.05 |    0.08    0.09    0.09
a_f      |   -0.04   -0.08 |    0.65    1.81 |   -1.47   -3.12 |    1.45    3.13
w_f      |    0.00   -0.00   -0.00 |    0.02    0.02    0.02 |   -0.06   -0.08   -0.05 |    0.05    0.08    0.04
w_rewards |   -0.76 |    0.41 |   -2.78 |   -0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |  -32.26 |   23.92 |  -50.00 |    0.00
theta_cv |    0.63 |    0.32 |    0.00 |    3.08
seeker_angles |   -0.03   -0.00 |    0.27    0.28 |   -1.00   -1.00 |    1.00    1.00
cs_angles | -0.0258 -0.0037 |  0.2696  0.2796 | -0.9984 -0.9997 |  0.9999  0.9999
optical_flow | -0.0004 -0.0002 |  0.0184  0.0203 | -1.0201 -0.7076 |  1.2298  0.8665
v_err    | -0.0213 |  0.0741 | -0.5100 |  0.5308
landing_rewards |    0.13 |    1.13 |    0.00 |   10.00
landing_margin |   39

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2776   0.0933   0.4682  46.1133  14.3235  11.3691
Update Cnt = 290    ET =   1339.8   Stats:  Mean, Std, Min, Max
r_f      |  -17.97    1.46   12.31 |  202.65  180.92  218.09 | -454.51 -629.50 -392.10 |  564.24  413.98  801.85
v_f      |    0.02   -0.04   -0.01 |    0.24    0.26    0.23 |   -0.85   -0.96   -0.65 |    0.68    0.89    0.73
r_i      |  -61.35   26.62   34.98 |  679.87  657.82  780.32 |-1325.85-1321.77-1325.50 | 1297.32 1289.52 1304.91
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.09    0.10
norm_rf  |   38.92 |   57.88 |    0.34 |  447.08
norm_vf  |    0.37 |    0.21 |    0.03 |    1.21
gs_f     |    0.92 |    1.44 |    0.03 |   19.78
thrust   |    0.00   -0.01   -0.01 |    0.93    0.96    0.93 |   -3.45   -3.32   -3.34 |    3.46    3.42    3.36
norm_thrust |    1.49 |    0.67 |    0.00 |    3.46
fuel     |    3.56 |    0.33 |    2.40 |    4.54
rewards  | -119.75 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7339   0.3352   1.6229  46.1133  14.3235  11.3691
***** Episode 9291, Mean R = -125.1  Std R = 36.7  Min R = -193.6
PolicyLoss: 3.03
Policy_Entropy: 0.0316
Policy_KL: 0.00785
Policy_SD: 0.86
Steps: 1.11e+04
TotalSteps: 2.84e+06
VF_0_ExplainedVarNew: 0.65
VF_0_ExplainedVarOld: 0.471
VF_0_Loss : 0.0513


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0370   0.0127   0.0647   8.4490   2.3854   3.1146
ADVA:  (19210,) (33416,) 0.5748743117069667
ADV1:  0.0 -0.006440586491710745 0.2141613524429214 2.4493604278564454 -2.227632285753888
ADVB:  (18311,) (33416,) 0.5479710318410342
ADV2:  0.027887660399184628 0.20907635169223177 0.485478188346185 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4011   0.1839   0.9089  46.1133  14.3235  11.3691
***** Episode 9322, Mean R = -111.5  Std R = 35.2  Min R = -170.7
PolicyLoss: 2.39
Policy_Entropy: 0.0314
Policy_KL: 0.0076
Policy_SD: 0.863
Steps: 1.11e+04
TotalSteps: 2.85e+06
VF_0_Ex

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3088   0.1188   0.5791  46.1133  14.3235  11.3691
***** Episode 9539, Mean R = -114.3  Std R = 39.8  Min R = -175.6
PolicyLoss: 2.18
Policy_Entropy: 0.0319
Policy_KL: 0.0107
Policy_SD: 0.859
Steps: 1.05e+04
TotalSteps: 2.93e+06
VF_0_ExplainedVarNew: 0.754
VF_0_ExplainedVarOld: 0.679
VF_0_Loss : 0.0481


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0266   0.0154   0.0661   8.4490   2.3854   3.1146
ADVA:  (20238,) (32751,) 0.6179353302189247
ADV1:  0.004113036040687898 -0.007259953413258768 0.19682188632634123 1.7915339152018228 -4.551945992551545
ADVB:  (17234,) (32751,) 0.5262129400628989
ADV2:  0.020318120690197 0.1940294863477059 0.4147067398802011 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3777   0.1413   0.6378  46.1133  14.3235  11.3691
***** Episode 9570, Mean R = -106.5  Std R = 37.3  Min R = -180.6
PolicyLoss: 2.32
Policy_Entropy: 0.0315
Policy_KL: 0.011
Policy_SD: 0.858
Steps: 1.14e+04
TotalSteps: 2

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1414   0.0442   0.2299  46.1133  14.3235  11.3691
***** Episode 9787, Mean R = -117.3  Std R = 37.6  Min R = -171.4
PolicyLoss: 2.06
Policy_Entropy: 0.0322
Policy_KL: 0.00676
Policy_SD: 0.855
Steps: 1.11e+04
TotalSteps: 3.01e+06
VF_0_ExplainedVarNew: 0.738
VF_0_ExplainedVarOld: 0.711
VF_0_Loss : 0.0369


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0148   0.0063   0.0292   8.4490   2.3854   3.1146
ADVA:  (18971,) (33424,) 0.5675861656294878
ADV1:  0.0 -0.007728069980161442 0.15362059002155587 1.9619242212373922 -2.0698841831805805
ADVB:  (19080,) (33424,) 0.5708472953566299
ADV2:  0.04224424943200855 0.18106219141983804 0.3967632342651442 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1591   0.0590   0.3047  46.1133  14.3235  11.3691
***** Episode 9818, Mean R = -107.6  Std R = 34.3  Min R = -157.9
PolicyLoss: 1.97
Policy_Entropy: 0.0321
Policy_KL: 0.00705
Policy_SD: 0.855
Steps: 1.1e+04
TotalSteps: 3.03e+06
VF_

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4104   0.1330   0.6642  46.1133  14.3235  11.3691
***** Episode 10035, Mean R = -105.3  Std R = 37.5  Min R = -178.7
PolicyLoss: 2.59
Policy_Entropy: 0.0339
Policy_KL: 0.0103
Policy_SD: 0.853
Steps: 1.12e+04
TotalSteps: 3.1e+06
VF_0_ExplainedVarNew: 0.698
VF_0_ExplainedVarOld: 0.588
VF_0_Loss : 0.036


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0198   0.0078   0.0321   8.4490   2.3854   3.1146
ADVA:  (17203,) (33834,) 0.5084530354081693
ADV1:  0.0 -0.0014668291182957753 0.1501358229959705 2.1573418362935386 -1.9882722812130798
ADVB:  (18347,) (33834,) 0.5422651770408465
ADV2:  0.023144067431458828 0.20057110116618027 0.4515948099851673 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2581   0.1029   0.5326  46.1133  14.3235  11.3691
***** Episode 10066, Mean R = -94.2  Std R = 38.3  Min R = -178.5
PolicyLoss: 2.24
Policy_Entropy: 0.0337
Policy_KL: 0.0108
Policy_SD: 0.853
Steps: 1.15e+04
TotalSteps: 3.11e+06
VF_0

***** Episode 10283, Mean R = -98.1  Std R = 40.0  Min R = -182.3
PolicyLoss: 1.98
Policy_Entropy: 0.0343
Policy_KL: 0.012
Policy_SD: 0.854
Steps: 1.13e+04
TotalSteps: 3.19e+06
VF_0_ExplainedVarNew: 0.388
VF_0_ExplainedVarOld: 0.28
VF_0_Loss : 0.0725


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00133359] 0.0023073689045181906
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0200   0.0048   0.0286   8.4490   2.3854   3.1146
ADVA:  (19210,) (33773,) 0.5687975601812099
ADV1:  0.0 -0.004077054731234135 0.18525332548795892 1.9652387276575911 -5.272046402152459
ADVB:  (19113,) (33773,) 0.5659254434015337
ADV2:  0.03412951242247261 0.15575378104998605 0.36776377133372534 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3996   0.1596   0.7643  46.1133  14.3235  11.3691
***** Episode 10314, Mean R = -93.8  Std R = 36.8  Min R = -155.4
PolicyLoss: 1.69
Policy_Entropy: 0.0346
Policy_KL: 0.00944
Policy_SD: 0.854
Steps: 1.11e+04
TotalSteps: 3.2e+06
VF_0_ExplainedVa

seeker_angles |   -0.02    0.02 |    0.21    0.22 |   -1.00   -1.00 |    1.00    1.00
cs_angles | -0.0201  0.0235 |  0.2060  0.2230 | -0.9976 -0.9984 |  0.9999  0.9997
optical_flow |  0.0002  0.0003 |  0.0217  0.0257 | -1.1268 -0.9565 |  1.1230  1.0960
v_err    | -0.0114 |  0.0730 | -0.5060 |  0.4593
landing_rewards |    1.45 |    3.52 |    0.00 |   10.00
landing_margin |   16.92 |   82.87 |   -0.08 |  957.87
tracking_rewards |  -67.91 |   18.22 | -131.40 |  -12.98
steps    |     365 |      38 |      19 |     421
***** Episode 10593, Mean R = -89.9  Std R = 32.9  Min R = -165.3
PolicyLoss: 1.94
Policy_Entropy: 0.0352
Policy_KL: 0.00854
Policy_SD: 0.845
Steps: 1.15e+04
TotalSteps: 3.31e+06
VF_0_ExplainedVarNew: 0.359
VF_0_ExplainedVarOld: 0.244
VF_0_Loss : 0.0512


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0169   0.0058   0.0292   8.4490   2.3854   3.1146
ADVA:  (21367,) (34504,) 0.619261534894505
ADV1:  0.0030738276891182983 0.0070091567810491055 0.15238215016024412 2.184810

attitude |    0.08   -0.00   -0.14 |    1.10    0.64    1.73 |   -3.14   -1.56   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.08   -0.09   -0.07 |    0.09    0.09    0.09
a_f      |    0.02    0.04 |    0.66    1.85 |   -1.43   -3.14 |    1.46    3.14
w_f      |    0.00   -0.00   -0.00 |    0.03    0.03    0.03 |   -0.07   -0.07   -0.07 |    0.06    0.09    0.06
w_rewards |   -0.73 |    0.43 |   -3.83 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |   -7.58 |   17.93 |  -50.00 |    0.00
theta_cv |    0.54 |    0.30 |    0.00 |    2.99
seeker_angles |   -0.02    0.01 |    0.20    0.21 |   -0.99   -1.00 |    1.00    1.00
cs_angles | -0.0163  0.0133 |  0.2029  0.2129 | -0.9933 -0.9990 |  0.9997  0.9997
optical_flow |  0.0003  0.0003 |  0.0235  0.0255 | -0.9759 -0.9902 |  0.8578  0.8655
v_err    | -0.0115 |  0.0726 | -0.5052 |  0.3967
landing_rewards |    1.97 |    3.98 |    0.00 |   10.00
landing_margin |    5

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4885   0.2408   1.0326  46.1133  14.3235  11.3691
Update Cnt = 360    ET =   1329.3   Stats:  Mean, Std, Min, Max
r_f      |   -1.16   -3.10   17.54 |  191.97  156.63  206.13 | -394.19 -355.56 -410.65 |  401.11  389.97  376.59
v_f      |   -0.00   -0.01    0.00 |    0.12    0.13    0.12 |   -0.61   -0.63   -0.54 |    0.46    0.81    0.77
r_i      |  -21.76   13.69   53.01 |  699.85  629.52  769.91 |-1302.09-1283.34-1345.41 | 1348.19 1298.09 1322.66
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    5.50 |   12.47 |    0.18 |  136.88
norm_vf  |    0.15 |    0.14 |    0.03 |    0.97
gs_f     |    0.95 |    0.96 |    0.04 |    6.66
thrust   |    0.00   -0.01   -0.00 |    0.91    0.90    0.89 |   -3.40   -3.43   -3.45 |    3.41    3.44    3.45
norm_thrust |    1.40 |    0.69 |    0.00 |    3.46
fuel     |    3.31 |    0.31 |    2.56 |    4.29
rewards  |  -78.04 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2527   0.1021   0.5171  46.1133  14.3235  11.3691
***** Episode 11461, Mean R = -72.3  Std R = 27.9  Min R = -170.8
PolicyLoss: 2.1
Policy_Entropy: 0.0407
Policy_KL: 0.0114
Policy_SD: 0.819
Steps: 1.16e+04
TotalSteps: 3.63e+06
VF_0_ExplainedVarNew: 0.888
VF_0_ExplainedVarOld: 0.843
VF_0_Loss : 0.0139


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0096   0.0033   0.0146   8.4490   2.3854   3.1146
ADVA:  (17420,) (34876,) 0.4994838857667164
ADV1:  0.0 -0.007275754517170867 0.09205361267943105 1.191636503122563 -2.2899373384711765
ADVB:  (18791,) (34876,) 0.5387945865351531
ADV2:  0.031386480151997416 0.19258606406902268 0.3625601773604059 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3444   0.1459   0.6127  46.1133  14.3235  11.3691
***** Episode 11492, Mean R = -76.2  Std R = 29.4  Min R = -151.3
PolicyLoss: 2.09
Policy_Entropy: 0.0403
Policy_KL: 0.00749
Policy_SD: 0.819
Steps: 1.16e+04
TotalSteps: 3.64e+06
VF_0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1813   0.0724   0.3975  46.1133  14.3235  11.3691
***** Episode 11709, Mean R = -67.5  Std R = 29.6  Min R = -178.9
PolicyLoss: 1.91
Policy_Entropy: 0.0433
Policy_KL: 0.00913
Policy_SD: 0.812
Steps: 1.16e+04
TotalSteps: 3.72e+06
VF_0_ExplainedVarNew: 0.857
VF_0_ExplainedVarOld: 0.851
VF_0_Loss : 0.0133


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0138   0.0068   0.0285   8.4490   2.3854   3.1146
ADVA:  (17869,) (34753,) 0.5141714384369694
ADV1:  0.0 -0.0009112469450141713 0.08756584049479248 2.0924422550323634 -1.9497222053992052
ADVB:  (17527,) (34753,) 0.5043305613903836
ADV2:  0.0035167014081651294 0.18456923023949565 0.35969493460474017 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1881   0.0602   0.3085  46.1133  14.3235  11.3691
***** Episode 11740, Mean R = -69.0  Std R = 34.3  Min R = -166.8
PolicyLoss: 2.08
Policy_Entropy: 0.0437
Policy_KL: 0.00721
Policy_SD: 0.81
Steps: 1.15e+04
TotalSteps: 3.73e+06

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2623   0.0853   0.4765  46.1133  14.3235  11.3691
***** Episode 11957, Mean R = -67.1  Std R = 29.0  Min R = -179.5
PolicyLoss: 2.4
Policy_Entropy: 0.0459
Policy_KL: 0.0081
Policy_SD: 0.798
Steps: 1.17e+04
TotalSteps: 3.81e+06
VF_0_ExplainedVarNew: 0.725
VF_0_ExplainedVarOld: 0.635
VF_0_Loss : 0.015


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0048   0.0023   0.0085   8.4490   2.3854   3.1146
ADVA:  (19074,) (35192,) 0.5419981814048648
ADV1:  0.0 2.6320674488988814e-06 0.07311589366375758 1.9561046346028645 -1.9314159998489053
ADVB:  (17688,) (35192,) 0.5026142305069334
ADV2:  0.0029870644792458203 0.1990408189219697 0.3891616567968128 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2374   0.0830   0.3989  46.1133  14.3235  11.3691
***** Episode 11988, Mean R = -59.7  Std R = 18.3  Min R = -119.1
PolicyLoss: 2.24
Policy_Entropy: 0.0466
Policy_KL: 0.01
Policy_SD: 0.802
Steps: 1.18e+04
TotalSteps: 3.83e+06
VF_0_E

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7396   0.4528   1.7528  46.1133  14.3235  11.3691
***** Episode 12205, Mean R = -60.2  Std R = 19.0  Min R = -128.3
PolicyLoss: 3.25
Policy_Entropy: 0.0486
Policy_KL: 0.00868
Policy_SD: 0.788
Steps: 1.15e+04
TotalSteps: 3.91e+06
VF_0_ExplainedVarNew: 0.846
VF_0_ExplainedVarOld: 0.823
VF_0_Loss : 0.0108


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0033   0.0019   0.0079   8.4490   2.3854   3.1146
ADVA:  (18393,) (35077,) 0.5243606921914645
ADV1:  0.0 0.000672164742254678 0.041557739129566014 1.0383143901984757 -0.33944927659382
ADVB:  (16333,) (35077,) 0.46563275080537103
ADV2:  0.0 0.30856809880561703 0.5630400148645953 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3531   0.1274   0.6612  46.1133  14.3235  11.3691
***** Episode 12236, Mean R = -56.6  Std R = 13.0  Min R = -97.1
PolicyLoss: 3.71
Policy_Entropy: 0.0488
Policy_KL: 0.00814
Policy_SD: 0.786
Steps: 1.16e+04
TotalSteps: 3.92e+06
VF_0_ExplainedVarNew

***** Episode 12453, Mean R = -56.9  Std R = 18.1  Min R = -131.4
PolicyLoss: 3.52
Policy_Entropy: 0.0499
Policy_KL: 0.011
Policy_SD: 0.78
Steps: 1.15e+04
TotalSteps: 4e+06
VF_0_ExplainedVarNew: 0.879
VF_0_ExplainedVarOld: 0.849
VF_0_Loss : 0.00798


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0067   0.0037   0.0153   8.4490   2.3854   3.1146
ADVA:  (16515,) (34576,) 0.4776434521055067
ADV1:  0.0 -0.0017043081351690932 0.04408262896323179 0.5862875537338954 -1.1516117935312775
ADVB:  (17678,) (34576,) 0.5112795002313744
ADV2:  0.019115106006654704 0.30333752744290926 0.4941975270046243 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5645   0.9884   3.4362  46.1133  14.3235  11.3691
***** Episode 12484, Mean R = -54.9  Std R = 22.2  Min R = -154.4
PolicyLoss: 3.32
Policy_Entropy: 0.0501
Policy_KL: 0.00812
Policy_SD: 0.774
Steps: 1.16e+04
TotalSteps: 4.01e+06
VF_0_ExplainedVarNew: 0.901
VF_0_ExplainedVarOld: 0.896
VF_0_Loss : 0.00892


ValFun  Gradients: u/sd/Max/C 

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.10   -0.06 |    0.08    0.10    0.08
a_f      |   -0.04    0.06 |    0.70    1.82 |   -1.40   -3.14 |    1.45    3.10
w_f      |   -0.00   -0.00   -0.01 |    0.03    0.03    0.02 |   -0.07   -0.08   -0.06 |    0.06    0.10    0.05
w_rewards |   -0.58 |    0.37 |   -2.35 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |   -0.81 |    6.30 |  -50.00 |    0.00
theta_cv |    0.47 |    0.30 |    0.00 |    2.96
seeker_angles |    0.01    0.00 |    0.15    0.15 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0128  0.0030 |  0.1456  0.1462 | -0.9959 -0.9970 |  0.9999  0.9964
optical_flow |  0.0004  0.0006 |  0.0201  0.0289 | -0.7021 -0.9882 |  0.7348  1.0757
v_err    | -0.0114 |  0.0675 | -0.5010 |  0.2534
landing_rewards |    3.48 |    4.76 |    0.00 |   10.00
landing_margin |    0.61 |    3.77 |   -0.07 |   54.00
tracking_rewards |  -49.69 |   15.49 | -119.48 |  -25.44
steps    |     375 |  

Update Cnt = 420    ET =   1320.1   Stats:  Mean, Std, Min, Max
r_f      |  -11.98    5.15   24.71 |  182.44  165.86  203.69 | -394.74 -383.69 -365.38 |  396.50  393.12  411.17
v_f      |   -0.00    0.00   -0.00 |    0.07    0.06    0.07 |   -0.62   -0.19   -0.21 |    0.15    0.19    0.17
r_i      |  -39.06    0.05   64.65 |  686.05  643.06  778.07 |-1321.30-1330.82-1296.25 | 1337.93 1334.96 1314.20
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.10    0.10
norm_rf  |    1.54 |    2.59 |    0.05 |   44.50
norm_vf  |    0.10 |    0.05 |    0.02 |    0.66
gs_f     |    1.40 |    4.16 |    0.02 |   64.55
thrust   |    0.00    0.01   -0.00 |    0.85    0.82    0.83 |   -3.42   -3.37   -3.46 |    3.44    3.43    3.46
norm_thrust |    1.26 |    0.70 |    0.00 |    3.46
fuel     |    2.70 |    0.28 |    1.97 |    3.87
rewards  |  -50.40 |   14.89 | -167.03 |  -22.60
fuel_rewards |   -7.72 |    0.79 |  -11.07 |   -5.65
glideslope_rewards | 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3875   0.1557   0.6964  46.1133  14.3235  11.3691
***** Episode 13321, Mean R = -48.0  Std R = 13.7  Min R = -73.5
PolicyLoss: 3.49
Policy_Entropy: 0.062
Policy_KL: 0.00859
Policy_SD: 0.732
Steps: 1.14e+04
TotalSteps: 4.33e+06
VF_0_ExplainedVarNew: 0.923
VF_0_ExplainedVarOld: 0.913
VF_0_Loss : 0.00866


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0033   8.4490   2.3854   3.1146
ADVA:  (15218,) (34524,) 0.4407948094079481
ADV1:  0.0 0.0005113823364248232 0.0244074638796326 0.2206731746557436 -0.28639330736496293
ADVB:  (18846,) (34524,) 0.545881126173097
ADV2:  0.0877302111054446 0.4071171546948826 0.5902228468723311 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4177   0.1886   0.8795  46.1133  14.3235  11.3691
***** Episode 13352, Mean R = -43.2  Std R = 9.3  Min R = -69.0
PolicyLoss: 3.94
Policy_Entropy: 0.0619
Policy_KL: 0.00648
Policy_SD: 0.725
Steps: 1.15e+04
TotalSteps: 4.34e+06
VF_0_Exp

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2135   0.0723   0.4075  46.1133  14.3235  11.3691
***** Episode 13569, Mean R = -49.9  Std R = 14.6  Min R = -79.3
PolicyLoss: 3.11
Policy_Entropy: 0.0652
Policy_KL: 0.013
Policy_SD: 0.723
Steps: 1.18e+04
TotalSteps: 4.42e+06
VF_0_ExplainedVarNew: 0.834
VF_0_ExplainedVarOld: 0.825
VF_0_Loss : 0.0114


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0034   0.0020   0.0079   8.4490   2.3854   3.1146
ADVA:  (18448,) (35258,) 0.5232287707754268
ADV1:  0.0 6.297502156586757e-05 0.029431135643623475 0.34114348333595557 -0.4339372952980879
ADVB:  (17495,) (35258,) 0.49619944409779343
ADV2:  0.0 0.31384482893541776 0.5041808019949807 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3907   0.1424   0.6787  46.1133  14.3235  11.3691
***** Episode 13600, Mean R = -45.9  Std R = 10.7  Min R = -67.9
PolicyLoss: 3.22
Policy_Entropy: 0.0663
Policy_KL: 0.0118
Policy_SD: 0.723
Steps: 1.18e+04
TotalSteps: 4.43e+06
VF_0_ExplainedVarNew

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3841   0.1307   0.6445  46.1133  14.3235  11.3691
***** Episode 13817, Mean R = -39.6  Std R = 10.6  Min R = -74.7
PolicyLoss: 3.77
Policy_Entropy: 0.0693
Policy_KL: 0.00957
Policy_SD: 0.71
Steps: 1.17e+04
TotalSteps: 4.51e+06
VF_0_ExplainedVarNew: 0.927
VF_0_ExplainedVarOld: 0.924
VF_0_Loss : 0.00842


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0010   0.0037   8.4490   2.3854   3.1146
ADVA:  (17790,) (35263,) 0.5044947962453563
ADV1:  0.0004241486896087096 0.0022409154300244132 0.022828864832696423 0.20693060108532868 -0.2813132604480733
ADVB:  (18633,) (35263,) 0.5284008734367467
ADV2:  0.05522876009983363 0.388563806281591 0.5697653267781193 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2132   0.0665   0.3366  46.1133  14.3235  11.3691
***** Episode 13848, Mean R = -46.5  Std R = 13.9  Min R = -84.1
PolicyLoss: 3.75
Policy_Entropy: 0.0697
Policy_KL: 0.00677
Policy_SD: 0.714
Steps: 1.18e+04
TotalSt

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3433   0.1195   0.6162  46.1133  14.3235  11.3691
***** Episode 14065, Mean R = -42.2  Std R = 12.5  Min R = -72.3
PolicyLoss: 3.39
Policy_Entropy: 0.0729
Policy_KL: 0.00778
Policy_SD: 0.704
Steps: 1.16e+04
TotalSteps: 4.61e+06
VF_0_ExplainedVarNew: 0.912
VF_0_ExplainedVarOld: 0.908
VF_0_Loss : 0.00954


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0018   0.0072   8.4490   2.3854   3.1146
ADVA:  (17869,) (34730,) 0.5145119493233515
ADV1:  0.00030659854615435606 0.0015430857298579367 0.022136344872671212 0.19624336083730065 -0.33533175126349524
ADVB:  (18734,) (34730,) 0.5394183702850561
ADV2:  0.0882900278977856 0.3932595855516618 0.5555530927683404 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2644   0.1022   0.4814  46.1133  14.3235  11.3691
***** Episode 14096, Mean R = -41.4  Std R = 12.1  Min R = -82.4
PolicyLoss: 3.67
Policy_Entropy: 0.0733
Policy_KL: 0.0103
Policy_SD: 0.7
Steps: 1.15e+04
TotalSt

***** Episode 14313, Mean R = -42.4  Std R = 10.3  Min R = -63.3
PolicyLoss: 3.63
Policy_Entropy: 0.0766
Policy_KL: 0.00767
Policy_SD: 0.698
Steps: 1.16e+04
TotalSteps: 4.7e+06
VF_0_ExplainedVarNew: 0.916
VF_0_ExplainedVarOld: 0.91
VF_0_Loss : 0.0104


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0004   0.0019   8.4490   2.3854   3.1146
ADVA:  (17421,) (34874,) 0.4995412054825945
ADV1:  0.0 0.001024909503593983 0.028726398913661112 0.26245747314397766 -0.710894586245221
ADVB:  (18557,) (34874,) 0.5321156162183862
ADV2:  0.066922383977535 0.38356731279899486 0.5501399242099828 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6940   0.3231   1.3581  46.1133  14.3235  11.3691
***** Episode 14344, Mean R = -40.4  Std R = 14.9  Min R = -100.3
PolicyLoss: 3.59
Policy_Entropy: 0.0766
Policy_KL: 0.00806
Policy_SD: 0.693
Steps: 1.17e+04
TotalSteps: 4.71e+06
VF_0_ExplainedVarNew: 0.928
VF_0_ExplainedVarOld: 0.92
VF_0_Loss : 0.00962


ValFun  Gradients: u/sd/Max/C Max

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.09   -0.08   -0.06 |    0.08    0.07    0.08
a_f      |   -0.04    0.01 |    0.65    1.84 |   -1.45   -3.14 |    1.49    3.12
w_f      |    0.00   -0.00   -0.01 |    0.03    0.03    0.02 |   -0.05   -0.06   -0.06 |    0.08    0.07    0.05
w_rewards |   -0.51 |    0.33 |   -3.01 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.40 |    0.30 |    0.00 |    3.05
seeker_angles |    0.01    0.00 |    0.11    0.12 |   -0.97   -0.99 |    0.98    1.00
cs_angles |  0.0086  0.0020 |  0.1121  0.1198 | -0.9748 -0.9944 |  0.9813  0.9970
optical_flow |  0.0003  0.0003 |  0.0214  0.0314 | -0.9559 -1.3794 |  0.9527  0.9338
v_err    | -0.0108 |  0.0623 | -0.5002 |  0.1825
landing_rewards |    5.42 |    4.98 |    0.00 |   10.00
landing_margin |    0.03 |    0.17 |   -0.07 |    1.64
tracking_rewards |  -38.71 |    9.73 |  -88.75 |  -22.90
steps    |     376 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4327   0.1445   0.6736  46.1133  14.3235  11.3691
Update Cnt = 480    ET =   1242.2   Stats:  Mean, Std, Min, Max
r_f      |   -6.11  -10.99   -0.61 |  192.23  170.00  203.34 | -394.54 -387.40 -380.04 |  394.52  379.97  400.98
v_f      |    0.00    0.00   -0.00 |    0.06    0.06    0.06 |   -0.15   -0.14   -0.13 |    0.17    0.14    0.12
r_i      |  -39.49  -55.90   14.00 |  693.09  659.18  757.16 |-1307.71-1380.36-1268.41 | 1339.08 1299.31 1245.89
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.94 |    0.49 |    0.12 |    2.53
norm_vf  |    0.09 |    0.03 |    0.02 |    0.23
gs_f     |    1.22 |    2.04 |    0.01 |   26.47
thrust   |    0.00    0.00    0.00 |    0.80    0.80    0.78 |   -3.46   -3.45   -3.41 |    3.46    3.45    3.46
norm_thrust |    1.16 |    0.73 |    0.00 |    3.46
fuel     |    2.22 |    0.34 |    1.59 |    3.95
rewards  |  -39.62 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2583   0.0951   0.4877  46.1133  14.3235  11.3691
***** Episode 15181, Mean R = -35.9  Std R = 13.8  Min R = -72.8
PolicyLoss: 3.4
Policy_Entropy: 0.0913
Policy_KL: 0.00742
Policy_SD: 0.651
Steps: 1.15e+04
TotalSteps: 5.02e+06
VF_0_ExplainedVarNew: 0.945
VF_0_ExplainedVarOld: 0.941
VF_0_Loss : 0.00764


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0003   0.0014   8.4490   2.3854   3.1146
ADVA:  (18928,) (34518,) 0.5483515846804565
ADV1:  0.0010760150517446845 0.0007488660909989072 0.019260646733606814 0.16592013164694241 -0.21914048518278798
ADVB:  (18887,) (34518,) 0.547163798597833
ADV2:  0.10990906649745222 0.3859810611142607 0.5320270250713932 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4315   0.1637   0.8351  46.1133  14.3235  11.3691
***** Episode 15212, Mean R = -38.4  Std R = 12.5  Min R = -74.4
PolicyLoss: 3.29
Policy_Entropy: 0.0917
Policy_KL: 0.00688
Policy_SD: 0.651
Steps: 1.15e+04
TotalS

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4729   0.2412   0.9738  46.1133  14.3235  11.3691
***** Episode 15429, Mean R = -37.6  Std R = 12.8  Min R = -60.8
PolicyLoss: 2.74
Policy_Entropy: 0.0947
Policy_KL: 0.00447
Policy_SD: 0.639
Steps: 1.17e+04
TotalSteps: 5.12e+06
VF_0_ExplainedVarNew: 0.897
VF_0_ExplainedVarOld: 0.892
VF_0_Loss : 0.00915


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0045   0.0028   0.0113   8.4490   2.3854   3.1146
ADVA:  (19566,) (35372,) 0.5531493836933168
ADV1:  0.0014856540783151836 0.0004779509641073876 0.023319457361006234 0.14277549729353364 -0.3848756595939159
ADVB:  (19954,) (35372,) 0.5641185118172566
ADV2:  0.12018883399249043 0.328854203240211 0.44307045704373665 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2782   0.1351   0.5741  46.1133  14.3235  11.3691
***** Episode 15460, Mean R = -34.2  Std R = 11.6  Min R = -84.4
PolicyLoss: 2.67
Policy_Entropy: 0.0956
Policy_KL: 0.00467
Policy_SD: 0.637
Steps: 1.18e+04
Total

ADVA:  (19915,) (35474,) 0.5613970795512206
ADV1:  0.0014781804399049449 4.234704641672516e-05 0.017316587740817495 0.13451148786935152 -0.16376391588835354
ADVB:  (20536,) (35474,) 0.5789028584315273
ADV2:  0.1823555448625266 0.3873961271764542 0.5089248877576797 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2405   0.0866   0.3868  46.1133  14.3235  11.3691
***** Episode 15677, Mean R = -32.0  Std R = 12.8  Min R = -84.4
PolicyLoss: 3.01
Policy_Entropy: 0.101
Policy_KL: 0.00438
Policy_SD: 0.619
Steps: 1.19e+04
TotalSteps: 5.21e+06
VF_0_ExplainedVarNew: 0.954
VF_0_ExplainedVarOld: 0.95
VF_0_Loss : 0.0067


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0006   0.0029   8.4490   2.3854   3.1146
ADVA:  (19648,) (35254,) 0.5573268281613434
ADV1:  0.0018382505376530883 0.00025205056692639616 0.01761552781360163 0.13451148786935152 -0.18655815886110724
ADVB:  (20768,) (35254,) 0.5890962727633744
ADV2:  0.20240035349317395 0.40030973778938267 0.5168753830678767 3

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0055   0.0032   0.0120   8.4490   2.3854   3.1146
ADVA:  (18801,) (34696,) 0.5418780262854508
ADV1:  0.002654329911134335 0.000916524835667004 0.038359233603162846 0.20286463374763847 -1.7367526430349067
ADVB:  (21360,) (34696,) 0.6156329259857044
ADV2:  0.17806406847410203 0.30968550348897117 0.4023711477120528 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2912   0.1215   0.5332  46.1133  14.3235  11.3691
***** Episode 15925, Mean R = -36.5  Std R = 13.8  Min R = -83.8
PolicyLoss: 2.27
Policy_Entropy: 0.101
Policy_KL: 0.00641
Policy_SD: 0.643
Steps: 1.17e+04
TotalSteps: 5.3e+06
VF_0_ExplainedVarNew: 0.932
VF_0_ExplainedVarOld: 0.905
VF_0_Loss : 0.00877


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0036   0.0021   0.0077   8.4490   2.3854   3.1146
ADVA:  (18421,) (34751,) 0.53008546516647
ADV1:  0.0 -0.0018014323179929

cs_angles |  0.0086  0.0060 |  0.1012  0.1135 | -0.9696 -0.9996 |  0.9760  0.9993
optical_flow |  0.0001  0.0001 |  0.0224  0.0311 | -0.9822 -1.7249 |  1.2114  1.6352
v_err    | -0.0110 |  0.0624 | -0.5004 |  0.5345
landing_rewards |    7.32 |    4.43 |    0.00 |   10.00
landing_margin |    1.55 |   27.54 |   -0.07 |  485.68
tracking_rewards |  -35.16 |    8.66 |  -73.61 |  -20.91
steps    |     375 |      23 |     158 |     417
***** Episode 16173, Mean R = -32.1  Std R = 10.4  Min R = -61.2
PolicyLoss: 3.16
Policy_Entropy: 0.105
Policy_KL: 0.0059
Policy_SD: 0.624
Steps: 1.13e+04
TotalSteps: 5.4e+06
VF_0_ExplainedVarNew: 0.956
VF_0_ExplainedVarOld: 0.951
VF_0_Loss : 0.01


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0007   0.0029   8.4490   2.3854   3.1146
ADVA:  (19310,) (34591,) 0.5582376918851725
ADV1:  0.002151448171601105 0.001209935371373307 0.016988155595397746 0.14717

glideslope |    2.84 |   12.87 |    0.01 |  200.01
norm_af  |    1.69 |    0.84 |    0.11 |    3.26
norm_wf  |    0.04 |    0.01 |    0.00 |    0.07
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |    0.08   -0.00   -0.04 |    1.18    0.66    1.81 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.09   -0.07   -0.05 |    0.08    0.05    0.08
a_f      |   -0.02   -0.14 |    0.68    1.75 |   -1.51   -3.14 |    1.47    3.12
w_f      |   -0.00    0.00   -0.00 |    0.03    0.03    0.01 |   -0.06   -0.06   -0.04 |    0.07    0.05    0.03
w_rewards |   -0.46 |    0.31 |   -2.42 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.37 |    0.30 |    0.00 |    2.96
seeker_angles |    0.01   -0.00 |    0.11    0.10 |   -0.99   -1.00 |    1.

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0017   0.0066   8.4490   2.3854   3.1146
ADVA:  (18948,) (34700,) 0.5460518731988473
ADV1:  0.0019093225821338253 0.0008602351846601478 0.018065694553804492 0.09581833884119992 -0.2903941697161861
ADVB:  (20124,) (34700,) 0.5799423631123919
ADV2:  0.19697266458311782 0.40540077368099847 0.5159264225839548 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3923   0.1408   0.6851  46.1133  14.3235  11.3691
Update Cnt = 540    ET =   1182.1   Stats:  Mean, Std, Min, Max
r_f      |   -2.42    3.48   10.34 |  195.36  163.08  207.08 | -387.45 -381.51 -388.10 |  624.55  384.89  412.09
v_f      |   -0.01    0.00   -0.00 |    0.05    0.05    0.07 |   -0.13   -0.17   -0.76 |    0.12    0.16    0.13
r_i      |  -14.18    7.26   35.97 |  701.78  634.57  773.63 |-1305.48-1330.15-1347.21 | 1289.32 1374.97 1315.11
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.09    0.08    0.09
norm

***** Episode 17010, Mean R = -31.3  Std R = 11.6  Min R = -73.1
PolicyLoss: 3.04
Policy_Entropy: 0.114
Policy_KL: 0.00798
Policy_SD: 0.609
Steps: 1.16e+04
TotalSteps: 5.71e+06
VF_0_ExplainedVarNew: 0.953
VF_0_ExplainedVarOld: 0.95
VF_0_Loss : 0.00949


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0018   0.0072   8.4490   2.3854   3.1146
ADVA:  (18859,) (34837,) 0.5413497143841318
ADV1:  0.002541356032328147 0.0018530150994587357 0.016556397483558583 0.10643587833950918 -0.1573450262483157
ADVB:  (20488,) (34837,) 0.5881103424519907
ADV2:  0.23924506881331306 0.4534842862955123 0.5577983859633598 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5627   0.1899   1.0371  46.1133  14.3235  11.3691
***** Episode 17041, Mean R = -29.8  Std R = 9.8  Min R = -49.2
PolicyLoss: 3.31
Policy_Entropy: 0.114
Policy_KL: 0.0079
Policy_SD: 0.605
Steps: 1.16e+04
TotalSteps: 5.72e+06
VF_0_ExplainedVarNew: 0.96
VF_0_ExplainedVarOld: 0.956
VF_0_Loss : 0.00786


ValFun  Gradient

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7187   0.3671   1.4297  46.1133  14.3235  11.3691
***** Episode 17258, Mean R = -30.4  Std R = 10.5  Min R = -64.3
PolicyLoss: 2.62
Policy_Entropy: 0.116
Policy_KL: 0.00582
Policy_SD: 0.612
Steps: 1.17e+04
TotalSteps: 5.81e+06
VF_0_ExplainedVarNew: 0.955
VF_0_ExplainedVarOld: 0.952
VF_0_Loss : 0.00998


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0018   0.0074   8.4490   2.3854   3.1146
ADVA:  (20495,) (34901,) 0.5872324575227071
ADV1:  0.0 -0.0022589837935687203 0.016585888466469557 0.12663187367126283 -0.21508769091225535
ADVB:  (17409,) (34901,) 0.4988109223231426
ADV2:  0.0 0.27552366062807593 0.42741347000361185 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4347   0.2059   0.8442  46.1133  14.3235  11.3691
***** Episode 17289, Mean R = -31.3  Std R = 9.4  Min R = -57.2
PolicyLoss: 2.37
Policy_Entropy: 0.116
Policy_KL: 0.00646
Policy_SD: 0.616
Steps: 1.17e+04
TotalSteps: 5.82e+06
VF_0_ExplainedVar

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5945   0.2560   1.1629  46.1133  14.3235  11.3691
***** Episode 17506, Mean R = -28.5  Std R = 10.4  Min R = -62.8
PolicyLoss: 2.87
Policy_Entropy: 0.12
Policy_KL: 0.00641
Policy_SD: 0.593
Steps: 1.18e+04
TotalSteps: 5.9e+06
VF_0_ExplainedVarNew: 0.959
VF_0_ExplainedVarOld: 0.957
VF_0_Loss : 0.00842


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0019   8.4490   2.3854   3.1146
ADVA:  (20632,) (35262,) 0.5851057795927628
ADV1:  0.0017914878105669812 0.00021619693630460832 0.015120220594800582 0.11077726603095975 -0.11862601597013478
ADVB:  (20619,) (35262,) 0.5847371107708015
ADV2:  0.18484463353129477 0.37658924097562163 0.49077664676742083 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8592   0.4584   1.8439  46.1133  14.3235  11.3691
***** Episode 17537, Mean R = -30.6  Std R = 9.2  Min R = -60.3
PolicyLoss: 2.71
Policy_Entropy: 0.12
Policy_KL: 0.00762
Policy_SD: 0.592
Steps: 1.17e+04
TotalSt

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1939   0.0650   0.3910  46.1133  14.3235  11.3691
***** Episode 17754, Mean R = -28.4  Std R = 11.6  Min R = -63.9
PolicyLoss: 2.17
Policy_Entropy: 0.119
Policy_KL: 0.00581
Policy_SD: 0.609
Steps: 1.18e+04
TotalSteps: 5.99e+06
VF_0_ExplainedVarNew: 0.961
VF_0_ExplainedVarOld: 0.955
VF_0_Loss : 0.00902


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0016   0.0069   8.4490   2.3854   3.1146
ADVA:  (18307,) (35013,) 0.5228629366235398
ADV1:  0.0006861627688489729 -0.0006665648640427936 0.016773343383163903 0.12945100197812964 -0.16715222140588637
ADVB:  (20866,) (35013,) 0.5959500756861738
ADV2:  0.2111686825153982 0.398053509298896 0.5222917312189361 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.2507   1.2199   4.4018  46.1133  14.3235  11.3691
***** Episode 17785, Mean R = -31.5  Std R = 12.3  Min R = -80.6
Polic

theta_cv |    0.35 |    0.30 |    0.00 |    2.76
seeker_angles |    0.01   -0.00 |    0.10    0.10 |   -1.00   -0.99 |    0.98    0.98
cs_angles |  0.0066 -0.0006 |  0.0990  0.1048 | -0.9971 -0.9947 |  0.9827  0.9813
optical_flow | -0.0000 -0.0000 |  0.0255  0.0275 | -1.0526 -0.9628 |  0.9399  1.2800
v_err    | -0.0110 |  0.0606 | -0.4959 |  0.1524
landing_rewards |    8.39 |    3.68 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.09 |    0.06
tracking_rewards |  -31.85 |    8.14 |  -76.94 |  -19.04
steps    |     377 |      21 |     329 |     421
***** Episode 18033, Mean R = -26.0  Std R = 8.1  Min R = -46.1
PolicyLoss: 2.57
Policy_Entropy: 0.124
Policy_KL: 0.00541
Policy_SD: 0.604
Steps: 1.17e+04
TotalSteps: 6.1e+06
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.00703


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0014   8.4490   2.3854   3.1146
ADVA:  (18560,) (35109,) 0.5286393802158991
ADV1:  0.0013444992858230084 0.0

attitude |    0.00   -0.03   -0.07 |    1.13    0.64    1.78 |   -3.14   -1.54   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.10   -0.08   -0.05 |    0.08    0.05    0.07
a_f      |   -0.02   -0.05 |    0.67    1.79 |   -1.37   -3.14 |    1.50    3.14
w_f      |   -0.00    0.00   -0.00 |    0.02    0.02    0.01 |   -0.05   -0.05   -0.04 |    0.07    0.04    0.03
w_rewards |   -0.53 |    0.40 |   -3.10 |   -0.01
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.34 |    0.30 |    0.00 |    2.99
seeker_angles |    0.01   -0.00 |    0.10    0.10 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0077 -0.0007 |  0.1012  0.0994 | -0.9999 -0.9996 |  0.9961  0.9999
optical_flow | -0.0000  0.0002 |  0.0259  0.0278 | -1.2739 -1.7103 |  1.1594  1.6317
v_err    | -0.0112 |  0.0609 | -0.4551 |  0.1286
landing_rewards |    7.90 |    4.07 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5908   0.2354   1.1276  46.1133  14.3235  11.3691
Update Cnt = 600    ET =   1175.2   Stats:  Mean, Std, Min, Max
r_f      |   -6.15   -1.08   14.72 |  180.64  172.94  198.35 | -390.33 -355.09 -379.03 |  386.27  376.58  396.58
v_f      |    0.00    0.00    0.00 |    0.05    0.05    0.05 |   -0.12   -0.12   -0.14 |    0.13    0.14    0.13
r_i      |   21.92  -23.75   45.39 |  682.80  653.40  761.70 |-1232.39-1306.94-1278.21 | 1327.49 1230.33 1325.42
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.48 |    0.23 |    0.04 |    1.41
norm_vf  |    0.08 |    0.02 |    0.03 |    0.16
gs_f     |    1.41 |    3.02 |    0.01 |   43.17
thrust   |    0.00   -0.00   -0.00 |    0.71    0.73    0.72 |   -3.42   -3.46   -3.41 |    3.45    3.45    3.44
norm_thrust |    1.00 |    0.74 |    0.00 |    3.46
fuel     |    1.82 |    0.23 |    1.43 |    3.02
rewards  |  -27.85 |

ADVA:  (20042,) (35649,) 0.5622037083789166
ADV1:  0.0005297009584907218 -0.0010999056546071818 0.01710678034622332 0.14562188786712138 -0.2304947729030462
ADVB:  (19483,) (35649,) 0.5465230441246599
ADV2:  0.07928642532252998 0.3012230382192179 0.46305672419370125 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7886   0.4112   1.6499  46.1133  14.3235  11.3691
***** Episode 18901, Mean R = -27.1  Std R = 8.8  Min R = -47.3
PolicyLoss: 2.25
Policy_Entropy: 0.129
Policy_KL: 0.00825
Policy_SD: 0.589
Steps: 1.19e+04
TotalSteps: 6.43e+06
VF_0_ExplainedVarNew: 0.962
VF_0_ExplainedVarOld: 0.958
VF_0_Loss : 0.007


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0019   8.4490   2.3854   3.1146
ADVA:  (18264,) (35481,) 0.514754375581297
ADV1:  0.0012485125847564692 0.0002708653994086906 0.014985865426194461 0.14562188786712138 -0.16357072113191562
ADVB:  (21992,) (35481,) 0.6198246949071334
ADV2:  0.23883425231022007 0.4129216553084937 0.5280442724143848 3.0

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0016   0.0066   8.4490   2.3854   3.1146
ADVA:  (19751,) (35282,) 0.5598038659939912
ADV1:  0.0014151658422690838 8.154801333053274e-05 0.014407040557734474 0.10155802518736673 -0.12172577941767737
ADVB:  (21290,) (35282,) 0.6034238421858171
ADV2:  0.21259085141566605 0.4146118451133728 0.5283026960774945 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2880   0.1034   0.4980  46.1133  14.3235  11.3691
***** Episode 19149, Mean R = -24.6  Std R = 5.6  Min R = -37.2
PolicyLoss: 2.79
Policy_Entropy: 0.132
Policy_KL: 0.00824
Policy_SD: 0.585
Steps: 1.2e+04
TotalSteps: 6.52e+06
VF_0_ExplainedVarNew: 0.975
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.00674


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0005   0.0022   8.4490   2.3854   3.1146
ADVA:  (19330,) (35290,) 0.5477472371776707
ADV1:  0.002254178030882138 0.0013350816222167346 0.013564151525741447 0.10155802518736673 -0.1421605722240047
ADVB:  (2

***** Episode 19366, Mean R = -26.8  Std R = 7.7  Min R = -50.1
PolicyLoss: 2.22
Policy_Entropy: 0.133
Policy_KL: 0.00572
Policy_SD: 0.589
Steps: 1.18e+04
TotalSteps: 6.6e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.00871


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0033   8.4490   2.3854   3.1146
ADVA:  (19263,) (35306,) 0.5456013142242112
ADV1:  0.0012078661987884615 -0.00011730428632272169 0.014605261879866133 0.13961967111176282 -0.1546545774717858
ADVB:  (22010,) (35306,) 0.623406786381918
ADV2:  0.22001729425367597 0.3853464134402081 0.5055789962243148 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3034   0.1046   0.5613  46.1133  14.3235  11.3691
***** Episode 19397, Mean R = -26.6  Std R = 10.1  Min R = -52.0
PolicyLoss: 2.5
Policy_Entropy: 0.133
Policy_KL: 0.00538
Policy_SD: 0.589
Steps: 1.18e+04
TotalSteps: 6.61e+06
VF_0_ExplainedVarNew: 0.971
VF_0_ExplainedVarOld: 0.968
VF_0_Loss : 0.00677


ValFun  Gradi

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6502   0.2739   1.2536  46.1133  14.3235  11.3691
***** Episode 19614, Mean R = -23.8  Std R = 5.6  Min R = -39.4
PolicyLoss: 2.73
Policy_Entropy: 0.134
Policy_KL: 0.0106
Policy_SD: 0.585
Steps: 1.16e+04
TotalSteps: 6.69e+06
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.00853


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0025   8.4490   2.3854   3.1146
ADVA:  (19121,) (35027,) 0.5458931681274446
ADV1:  0.0028123409605071356 0.0018232743820480802 0.014154341412231458 0.10753417280818645 -0.18428169804430372
ADVB:  (23125,) (35027,) 0.6602049847260685
ADV2:  0.304482536930827 0.4698467726005277 0.5684456875507016 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8707   0.2877   1.3719  46.1133  14.3235  11.3691
***** Episode 19645, Mean R = -27.3  Std R = 9.8  Min R = -58.2
PolicyLoss

attitude |   -0.15   -0.03   -0.07 |    1.12    0.62    1.78 |   -3.14   -1.55   -3.14 |    3.14    1.54    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.10   -0.08   -0.05 |    0.08    0.05    0.08
a_f      |   -0.04   -0.03 |    0.63    1.77 |   -1.41   -3.11 |    1.52    3.13
w_f      |    0.00   -0.00   -0.00 |    0.02    0.02    0.01 |   -0.10   -0.03   -0.04 |    0.05    0.04    0.03
w_rewards |   -0.50 |    0.38 |   -2.73 |    0.00
w_penalty |   -0.32 |    5.67 | -100.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.32 |    0.31 |    0.00 |    3.04
seeker_angles |    0.01    0.00 |    0.10    0.09 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0095  0.0030 |  0.0977  0.0947 | -0.9963 -0.9989 |  0.9985  0.9980
optical_flow |  0.0001 -0.0000 |  0.0245  0.0264 | -0.9330 -1.3736 |  1.1267  1.3248
v_err    | -0.0111 |  0.0597 | -0.4565 |  0.2077
landing_rewards |    8.94 |    3.08 |    0.00 |   10.00
landing_margin |    2

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9216   0.4208   1.8300  46.1133  14.3235  11.3691
Update Cnt = 650    ET =   1360.8   Stats:  Mean, Std, Min, Max
r_f      |   23.97   -1.88   -5.73 |  185.97  178.63  192.18 | -396.80 -380.07 -390.10 |  394.00  392.04  374.65
v_f      |   -0.00   -0.00    0.00 |    0.05    0.04    0.05 |   -0.18   -0.11   -0.10 |    0.10    0.11    0.11
r_i      |   66.81    3.85  -22.88 |  690.36  676.04  749.30 |-1326.04-1289.84-1285.03 | 1359.93 1306.28 1300.21
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.40 |    0.20 |    0.07 |    1.36
norm_vf  |    0.08 |    0.02 |    0.03 |    0.18
gs_f     |    1.27 |    2.56 |    0.01 |   34.93
thrust   |    0.00   -0.00    0.01 |    0.70    0.72    0.71 |   -3.45   -3.45   -3.29 |    3.39    3.46    3.46
norm_thrust |    0.98 |    0.75 |    0.00 |    3.46
fuel     |    1.78 |    0.21 |    1.37 |    2.79
rewards  |  -25.54 |

ADVA:  (20474,) (35092,) 0.5834378205858886
ADV1:  0.0018169244411153987 0.00028043767081284085 0.015198651795864519 0.3058400352828379 -0.16857559077848638
ADVB:  (21248,) (35092,) 0.6054941297161747
ADV2:  0.17570423727983073 0.3736550745657402 0.5070278781775351 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7341   0.6873   3.3929  46.1133  14.3235  11.3691
***** Episode 20451, Mean R = -24.2  Std R = 7.2  Min R = -43.5
PolicyLoss: 2.42
Policy_Entropy: 0.14
Policy_KL: 0.00813
Policy_SD: 0.575
Steps: 1.17e+04
TotalSteps: 7.01e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.00749


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0013   0.0048   8.4490   2.3854   3.1146
ADVA:  (20547,) (35138,) 0.5847515510273777
ADV1:  0.0021715434909850805 0.0009047480365600244 0.01431605782998418 0.3058400352828379 -0.16857559077848638
ADVB:  (22331,) (35138,) 0.6355227958335705
ADV2:  0.22698040289812632 0.40767451519412845 0.5209605463114297 3.

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0008   0.0031   8.4490   2.3854   3.1146
ADVA:  (17161,) (35036,) 0.4898104806484759
ADV1:  0.0016685301184219646 0.0010169656432216405 0.012124918596667105 0.07376451227079833 -0.21157551953357956
ADVB:  (23718,) (35036,) 0.6769608402785706
ADV2:  0.3345940137899448 0.4784391052046794 0.5639243091819707 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.1568   3.5790  13.6169  46.1133  14.3235  11.3691
***** Episode 20699, Mean R = -22.6  Std R = 6.8  Min R = -38.2
PolicyLoss: 2.77
Policy_Entropy: 0.142
Policy_KL: 0.00737
Policy_SD: 0.574
Steps: 1.16e+04
TotalSteps: 7.11e+06
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.0071


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0061   8.4490   2.3854   3.1146
ADVA:  (19441,) (35027,) 0.555028977645816
ADV1:  0.0003726049496149599 

***** Episode 20916, Mean R = -23.0  Std R = 5.6  Min R = -34.6
PolicyLoss: 2.33
Policy_Entropy: 0.143
Policy_KL: 0.00775
Policy_SD: 0.57
Steps: 1.17e+04
TotalSteps: 7.19e+06
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00735


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0012   8.4490   2.3854   3.1146
ADVA:  (18581,) (35258,) 0.5270009643201543
ADV1:  0.0011953765060387644 0.00043544063535730045 0.011601123180246474 0.08386240092224378 -0.1374910643100391
ADVB:  (22666,) (35258,) 0.642861194622497
ADV2:  0.2477128678063049 0.4199538098256784 0.5345847469717586 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3652   0.1063   0.5442  46.1133  14.3235  11.3691
***** Episode 20947, Mean R = -22.5  Std R = 5.5  Min R = -39.2
PolicyLoss: 2.53
Policy_Entropy: 0.144
Policy_KL: 0.00742
Policy_SD: 0.563
Steps: 1.17e+04
TotalSteps: 7.2e+06
VF

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.2403   1.2609   4.7077  46.1133  14.3235  11.3691
***** Episode 21164, Mean R = -24.7  Std R = 8.0  Min R = -41.1
PolicyLoss: 2.32
Policy_Entropy: 0.147
Policy_KL: 0.00557
Policy_SD: 0.567
Steps: 1.17e+04
TotalSteps: 7.28e+06
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.972
VF_0_Loss : 0.00763


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0031   8.4490   2.3854   3.1146
ADVA:  (21528,) (35017,) 0.6147871034069167
ADV1:  0.0017675669468806415 0.00018078605768306335 0.013075436447454045 0.20012977944157423 -0.14333873805407565
ADVB:  (20870,) (35017,) 0.595996230402376
ADV2:  0.15944059231256333 0.3436292678401065 0.4619478502817713 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8690   0.4660   1.8210  46.1133  14.3235  11.3691
***** Episode 21195, Mean R = -24.4  Std R = 7.8  Min R = -48.0
PolicyLoss: 2.21
Policy_Entropy: 0.148
Policy_KL: 0.0049
Policy_SD: 0.568
Steps: 1.16e+04
TotalStep

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.09   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |    0.04    0.06 |    0.72    1.90 |   -1.50   -3.14 |    1.51    3.12
w_f      |    0.00    0.00   -0.00 |    0.02    0.02    0.01 |   -0.04   -0.04   -0.04 |    0.06    0.04    0.03
w_rewards |   -0.40 |    0.28 |   -2.64 |   -0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.30 |    0.30 |    0.00 |    3.02
seeker_angles |    0.01    0.00 |    0.09    0.09 |   -1.00   -0.99 |    1.00    1.00
cs_angles |  0.0062  0.0034 |  0.0923  0.0875 | -0.9979 -0.9861 |  0.9987  0.9969
optical_flow | -0.0001  0.0001 |  0.0260  0.0260 | -1.1376 -1.1868 |  1.1374  1.1987
v_err    | -0.0110 |  0.0599 | -0.4538 |  0.1350
landing_rewards |    8.68 |    3.39 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.08 |    0.04
tracking_rewards |  -26.94 |    5.46 |  -47.55 |  -16.89
steps    |     376 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4958   0.1971   0.9743  46.1133  14.3235  11.3691
Update Cnt = 700    ET =   1371.2   Stats:  Mean, Std, Min, Max
r_f      |    2.18    6.56   20.25 |  192.97  177.35  193.49 | -392.77 -388.24 -393.03 |  389.38  389.46  389.08
v_f      |   -0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.12   -0.16   -0.13 |    0.11    0.13    0.11
r_i      |    7.76    1.65   95.56 |  713.26  652.89  739.86 |-1305.22-1328.31-1267.05 | 1248.37 1326.33 1292.78
v_i      |   -0.00    0.00   -0.01 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.37 |    0.17 |    0.05 |    1.05
norm_vf  |    0.08 |    0.02 |    0.03 |    0.17
gs_f     |    1.06 |    1.13 |    0.01 |    6.90
thrust   |   -0.00    0.00   -0.00 |    0.69    0.70    0.71 |   -3.44   -3.46   -3.43 |    3.41    3.45    3.44
norm_thrust |    0.95 |    0.75 |    0.00 |    3.46
fuel     |    1.68 |    0.24 |    1.21 |    2.88
rewards  |  -23.33 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3390   0.1280   0.6204  46.1133  14.3235  11.3691
***** Episode 22001, Mean R = -24.3  Std R = 7.3  Min R = -40.6
PolicyLoss: 2.15
Policy_Entropy: 0.151
Policy_KL: 0.00689
Policy_SD: 0.575
Steps: 1.18e+04
TotalSteps: 7.6e+06
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.00756


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0040   0.0024   0.0089   8.4490   2.3854   3.1146
ADVA:  (21665,) (35066,) 0.6178349398277534
ADV1:  0.0018816436910145604 0.0004997513197650907 0.012478323243724122 0.07955705119390472 -0.15082896605356122
ADVB:  (21457,) (35066,) 0.6119032681229681
ADV2:  0.20183816093204782 0.36926655170208295 0.4814110151890603 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6579   0.2230   1.0543  46.1133  14.3235  11.3691
***** Episode 22032, Mean R = -21.9  Std R = 6.6  Min R = -33.3
Policy

ADVA:  (18857,) (35415,) 0.5324579980234364
ADV1:  0.0020415923877143763 0.0013188211405900949 0.01214470385219038 0.13235923494025204 -0.13765425563185762
ADVB:  (22574,) (35415,) 0.6374135253423691
ADV2:  0.25511763295049766 0.4381579094066217 0.5521796854968657 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5933   0.2051   1.0889  46.1133  14.3235  11.3691
***** Episode 22249, Mean R = -23.0  Std R = 6.0  Min R = -42.6
PolicyLoss: 2.62
Policy_Entropy: 0.151
Policy_KL: 0.00832
Policy_SD: 0.566
Steps: 1.18e+04
TotalSteps: 7.69e+06
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00629


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0039   0.0024   0.0094   8.4490   2.3854   3.1146
ADVA:  (20279,) (35737,) 0.5674511010997005
ADV1:  0.00033349663595316336 -0.0005643023105778238 0.012601808662700125 0.09438885890889626 -0.1592006637783211
ADVB:  (20077,) 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0013   0.0050   8.4490   2.3854   3.1146
ADVA:  (19563,) (35046,) 0.5582092107515836
ADV1:  0.0008385797479667396 -0.0003007706044698806 0.011260281529114043 0.0760656635626123 -0.11841375028080092
ADVB:  (20227,) (35046,) 0.5771557381726873
ADV2:  0.13680271972290362 0.34305091771184754 0.4828419656432068 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4681   0.1607   0.7562  46.1133  14.3235  11.3691
***** Episode 22497, Mean R = -24.7  Std R = 8.0  Min R = -42.6
PolicyLoss: 2.25
Policy_Entropy: 0.153
Policy_KL: 0.00663
Policy_SD: 0.559
Steps: 1.16e+04
TotalSteps: 7.79e+06
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.00687


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0012   0.0049   8.4490   2.3854   3.1146
ADVA:  (17743,) (34952,) 0.5076390478370336
ADV1:  0.0012668858427461842 0.00033694000398825985 0.012500518727313633 0.11212527048208054 -0.31864422196157816
ADVB

***** Episode 22714, Mean R = -23.7  Std R = 9.8  Min R = -59.6
PolicyLoss: 2.09
Policy_Entropy: 0.158
Policy_KL: 0.00814
Policy_SD: 0.547
Steps: 1.18e+04
TotalSteps: 7.87e+06
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00651


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0020   8.4490   2.3854   3.1146
ADVA:  (18568,) (35241,) 0.5268862972106353
ADV1:  0.0010545293878519608 0.0006478974385220172 0.009669922807075822 0.07149928482413032 -0.1696042691718207
ADVB:  (21708,) (35241,) 0.6159870605260918
ADV2:  0.20444203949400372 0.4049539033298399 0.5446198538967792 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.2775   0.0939   0.5003  46.1133  14.3235  11.3691
***** Episode 22745, Mean R = -20.7  Std R = 4.9  Min R = -30.4
PolicyLoss: 2.44
Policy_Entropy: 0.157
Policy_KL: 0.00691
Policy_SD: 0.545
Steps: 1.17e+04
TotalSteps: 7.88e+06
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.0051


ValFun  Gradie

cs_angles |  0.0059 -0.0022 |  0.0893  0.0892 | -0.9861 -0.9962 |  0.9816  0.9805
optical_flow | -0.0001  0.0003 |  0.0261  0.0219 | -0.8825 -1.0752 |  1.2624  1.1222
v_err    | -0.0107 |  0.0598 | -0.4569 |  0.1522
landing_rewards |    9.03 |    2.96 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.08 |    0.04
tracking_rewards |  -26.33 |    5.95 |  -62.02 |  -16.73
steps    |     377 |      22 |     330 |     420
***** Episode 22993, Mean R = -21.3  Std R = 6.3  Min R = -38.1
PolicyLoss: 2.43
Policy_Entropy: 0.156
Policy_KL: 0.0081
Policy_SD: 0.558
Steps: 1.17e+04
TotalSteps: 7.97e+06
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00576


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0057   0.0028   0.0115   8.4490   2.3854   3.1146
ADVA:  (19921,) (35055,) 0.5682784196263015
ADV1:  0.0020805551633254604 -0.0001559013802895856 0.02557079127237837 0.09122010803291147 -1.2418989791457515
ADVB:  (22060,) (35055,) 0.6292968192839823
ADV2:  0.19

thrust   |   -0.00   -0.00    0.00 |    0.68    0.69    0.68 |   -3.42   -3.46   -3.46 |    3.45    3.45    3.46
norm_thrust |    0.92 |    0.74 |    0.00 |    3.46
fuel     |    1.64 |    0.18 |    1.23 |    2.16
rewards  |  -22.00 |    8.19 | -112.12 |   -9.59
fuel_rewards |   -4.70 |    0.50 |   -6.20 |   -3.51
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    3.06 |   13.39 |    0.01 |  241.98
norm_af  |    1.73 |    0.90 |    0.09 |    3.32
norm_wf  |    0.03 |    0.01 |    0.00 |    0.05
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |    0.02   -0.02    0.12 |    1.19    0.66    1.83 |   -3.14   -1.56   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.08   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |   -0.02    0.20 |    0.66

ADVA:  (17191,) (35134,) 0.4892981157852792
ADV1:  0.0009832551591490556 0.0007234057186474305 0.009640485952574943 0.11072236349999365 -0.08471822272375207
ADVB:  (22856,) (35134,) 0.6505379404565378
ADV2:  0.26755061093759597 0.455075879505574 0.5771446364127154 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0037   0.4836   1.9867  46.1133  14.3235  11.3691
***** Episode 23582, Mean R = -20.7  Std R = 6.9  Min R = -39.8
PolicyLoss: 2.59
Policy_Entropy: 0.159
Policy_KL: 0.00582
Policy_SD: 0.566
Steps: 1.16e+04
TotalSteps: 8.19e+06
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00611


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0014   8.4490   2.3854   3.1146
ADVA:  (18716,) (34856,) 0.5369520312141381
ADV1:  0.0002956234880432118 -2.208221734530282e-05 0.009228317865366983 0.11072236349999365 -0.06196064665419694
ADVB:  (19638,) 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0002   0.0008   8.4490   2.3854   3.1146
ADVA:  (19486,) (34792,) 0.5600712807541963
ADV1:  0.00040320025983608954 0.00012034936350717119 0.009143884385217552 0.12548807335947443 -0.11092775233013658
ADVB:  (19472,) (34792,) 0.559668889399862
ADV2:  0.09672123363131362 0.3451848953161883 0.5093754416863359 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4197   0.1395   0.6740  46.1133  14.3235  11.3691
***** Episode 23830, Mean R = -21.1  Std R = 5.5  Min R = -34.8
PolicyLoss: 2.26
Policy_Entropy: 0.16
Policy_KL: 0.008
Policy_SD: 0.562
Steps: 1.16e+04
TotalSteps: 8.29e+06
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00577


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0020   8.4490   2.3854   3.1146
ADVA:  (19548,) (35065,) 0.5574789676315415
ADV1:  0.0 -0.000675292184319

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0052   0.0033   0.0117   8.4490   2.3854   3.1146
ADVA:  (19899,) (35200,) 0.5653125
ADV1:  0.0004142504645820288 -0.0007403224305618391 0.010988061296664147 0.0717282991176833 -0.09548149130386603
ADVB:  (19279,) (35200,) 0.5476988636363637
ADV2:  0.0837073164693545 0.3095658411192582 0.46141062030294433 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4441   0.1613   0.8308  46.1133  14.3235  11.3691
***** Episode 24078, Mean R = -21.8  Std R = 5.9  Min R = -31.4
PolicyLoss: 2.07
Policy_Entropy: 0.161
Policy_KL: 0.01
Policy_SD: 0.558
Steps: 1.18e+04
TotalSteps: 8.38e+06
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.00635


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0046   0.0029   0.0112   8.4490   2.3854   3.1146
ADVA:  (17236,) (35209,) 0.4895339259848334
ADV1:  0.0018369011282778937 0.000903422713256588 0.010674436138578785 0.06736060053789328 -0.09548149130386603
ADVB:  (23253,) (35

***** Episode 24295, Mean R = -22.1  Std R = 7.3  Min R = -43.1
PolicyLoss: 2.1
Policy_Entropy: 0.161
Policy_KL: 0.00917
Policy_SD: 0.558
Steps: 1.2e+04
TotalSteps: 8.46e+06
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.00545


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0028   8.4490   2.3854   3.1146
ADVA:  (19112,) (35381,) 0.5401769311212232
ADV1:  0.0017684362160245194 0.0011149681797804378 0.010989577926227645 0.14752120253022383 -0.30840872324808344
ADVB:  (23402,) (35381,) 0.6614284502981826
ADV2:  0.2920301989620954 0.45422432766071713 0.5384874166992079 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4598   0.1861   0.8925  46.1133  14.3235  11.3691
***** Episode 24326, Mean R = -20.6  Std R = 5.6  Min R = -39.4
PolicyLoss: 2.52
Policy_Entropy: 0.162
Policy_KL: 0.00661
Policy_SD: 0.558
Steps: 1.18e+04
TotalSteps: 8.48e+06
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00692


ValFun  Gradien

***** Episode 24543, Mean R = -22.8  Std R = 7.2  Min R = -43.8
PolicyLoss: 2.97
Policy_Entropy: 0.162
Policy_KL: 0.00629
Policy_SD: 0.571
Steps: 1.2e+04
TotalSteps: 8.56e+06
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00672


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0031   8.4490   2.3854   3.1146
ADVA:  (20750,) (35420,) 0.5858272162619989
ADV1:  0.0 -0.0012731641362237328 0.009198849021648942 0.10312284810986884 -0.12406904057668386
ADVB:  (15581,) (35420,) 0.4398927159796725
ADV2:  0.0 0.25169514037960916 0.4593992580037135 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7487   0.2129   1.1455  46.1133  14.3235  11.3691
***** Episode 24574, Mean R = -20.9  Std R = 7.0  Min R = -40.2
PolicyLoss: 2.08
Policy_Entropy: 0.162
Policy_KL: 0.00939
Policy_SD: 0.569
Steps: 1.16e+04
TotalSteps: 8.57e+06
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.00791


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd

attitude |   -0.06   -0.05   -0.00 |    1.25    0.65    1.88 |   -3.14   -1.56   -3.14 |    3.14    1.55    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.09   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |   -0.07   -0.03 |    0.64    1.88 |   -1.50   -3.14 |    1.40    3.13
w_f      |    0.00   -0.00   -0.01 |    0.02    0.01    0.01 |   -0.04   -0.03   -0.05 |    0.06    0.03    0.03
w_rewards |   -0.35 |    0.24 |   -2.19 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.28 |    0.29 |    0.00 |    2.83
seeker_angles |    0.00   -0.00 |    0.09    0.08 |   -0.97   -0.98 |    1.00    1.00
cs_angles |  0.0039 -0.0003 |  0.0868  0.0836 | -0.9705 -0.9826 |  0.9972  0.9955
optical_flow |  0.0004  0.0003 |  0.0260  0.0242 | -1.0040 -1.0128 |  1.1141  1.0306
v_err    | -0.0100 |  0.0588 | -0.4635 |  0.1867
landing_rewards |    9.00 |    3.00 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5847   0.2255   1.0860  46.1133  14.3235  11.3691
Update Cnt = 810    ET =   1569.9   Stats:  Mean, Std, Min, Max
r_f      |    2.12   -4.23    5.26 |  177.51  173.30  199.88 | -392.57 -390.76 -396.11 |  390.28  382.75  392.74
v_f      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.11 |    0.10    0.10    0.11
r_i      |   36.13  -34.75   40.19 |  683.16  629.30  780.35 |-1371.39-1288.41-1308.79 | 1360.84 1238.14 1301.31
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.27 |    0.12 |    0.03 |    0.70
norm_vf  |    0.08 |    0.02 |    0.03 |    0.13
gs_f     |    1.48 |    2.49 |    0.01 |   25.86
thrust   |   -0.00   -0.00   -0.01 |    0.67    0.68    0.68 |   -3.37   -3.40   -3.39 |    3.41    3.27    3.42
norm_thrust |    0.91 |    0.73 |    0.00 |    3.46
fuel     |    1.58 |    0.18 |    1.13 |    2.15
rewards  |  -20.51 |

ADVA:  (19070,) (35230,) 0.5413000283848992
ADV1:  0.0010989854867298199 0.0007032614236879096 0.009320925637539777 0.07517257344962208 -0.11390154605172198
ADVB:  (22236,) (35230,) 0.6311666193585013
ADV2:  0.2436926755937 0.4302667968993138 0.5374887693431228 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.2426   3.6944  13.8843  46.1133  14.3235  11.3691
***** Episode 25411, Mean R = -20.9  Std R = 7.0  Min R = -35.0
PolicyLoss: 2.43
Policy_Entropy: 0.168
Policy_KL: 0.0109
Policy_SD: 0.546
Steps: 1.18e+04
TotalSteps: 8.89e+06
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.0068


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0012   0.0043   8.4490   2.3854   3.1146
ADVA:  (19798,) (35191,) 0.5625870250916427
ADV1:  0.0009039878764526875 8.149559169065312e-05 0.01013491346474569 0.07517257344962208 -0.11860638698568149
ADVB:  (21659,) (35191,) 0.6154698644539797
ADV2:  0.200294501780101 0.39327431099407995 0.5014023422642112 3.0 0.0


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0019   8.4490   2.3854   3.1146
ADVA:  (19803,) (35151,) 0.5633694631731672
ADV1:  0.0 -0.0007814320756688795 0.008961322128711076 0.11186184083514744 -0.09969056911255869
ADVB:  (16850,) (35151,) 0.4793604733862479
ADV2:  0.0 0.3084188070365089 0.5129745271236902 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1290   0.4845   2.0610  46.1133  14.3235  11.3691
***** Episode 25659, Mean R = -20.6  Std R = 6.3  Min R = -38.9
PolicyLoss: 2.31
Policy_Entropy: 0.169
Policy_KL: 0.00787
Policy_SD: 0.545
Steps: 1.17e+04
TotalSteps: 8.98e+06
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.0074


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0013   8.4490   2.3854   3.1146
ADVA:  (17838,) (34982,) 0.5099193871133726
ADV1:  0.0003993050814138834 0.0004998322200546613 0.008098011060167156 0.11186184083514744 -0.09969056911255869
ADVB:  (20781,) (34982,) 0.5940483677319

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0005   0.0024   8.4490   2.3854   3.1146
ADVA:  (17398,) (35069,) 0.49610767344378226
ADV1:  0.0006136037563003148 0.00023194178429878875 0.010200730658651548 0.07933541370609021 -0.1463666835545704
ADVB:  (21860,) (35069,) 0.62334255325216
ADV2:  0.22049464550122166 0.4161931376527103 0.5457913565933042 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5991   0.2596   1.2320  46.1133  14.3235  11.3691
***** Episode 25907, Mean R = -18.5  Std R = 5.4  Min R = -36.2
PolicyLoss: 2.39
Policy_Entropy: 0.169
Policy_KL: 0.00599
Policy_SD: 0.549
Steps: 1.17e+04
TotalSteps: 9.07e+06
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00563


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0043   0.0023   0.0079   8.4490   2.3854   3.1146
ADVA:  (19248,) (35223,) 0.5464611191550975
ADV1:  0.0 -0.000873411872451911 0.010050321750509357 0.07933541370609021 -0.10386040773518497
ADVB:  (18532,) (35223,) 

***** Episode 26124, Mean R = -20.5  Std R = 6.6  Min R = -40.5
PolicyLoss: 2.18
Policy_Entropy: 0.17
Policy_KL: 0.0059
Policy_SD: 0.544
Steps: 1.18e+04
TotalSteps: 9.15e+06
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00658


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0006   8.4490   2.3854   3.1146
ADVA:  (21237,) (35013,) 0.606546140005141
ADV1:  0.0019675434471084236 0.0003142092101970817 0.010870403598871193 0.07801460659695714 -0.07935141193981993
ADVB:  (21795,) (35013,) 0.6224830777139919
ADV2:  0.24324454494499684 0.39203785117141937 0.46653581036093894 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5348   0.1815   0.9004  46.1133  14.3235  11.3691
***** Episode 26155, Mean R = -23.1  Std R = 6.9  Min R = -39.3
PolicyLoss: 2.24
Policy_Entropy: 0.171
Policy_KL: 0.00596
Policy_SD: 0.545
Steps: 1.17e+04
TotalSteps: 9.17e+06
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00737


ValFun  Gradie

cs_angles |  0.0064  0.0046 |  0.0866  0.0928 | -0.9970 -0.9996 |  0.9919  0.9924
optical_flow |  0.0001  0.0002 |  0.0260  0.0241 | -1.4224 -1.3062 |  1.1380  1.0968
v_err    | -0.0109 |  0.0601 | -0.4558 |  0.1178
landing_rewards |    8.84 |    3.20 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.08 |    0.03
tracking_rewards |  -25.32 |    5.86 |  -59.09 |  -14.61
steps    |     378 |      20 |     335 |     416
***** Episode 26403, Mean R = -20.4  Std R = 6.1  Min R = -43.5
PolicyLoss: 2.72
Policy_Entropy: 0.171
Policy_KL: 0.00826
Policy_SD: 0.543
Steps: 1.16e+04
TotalSteps: 9.26e+06
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00719


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0007   0.0026   8.4490   2.3854   3.1146
ADVA:  (19832,) (34956,) 0.5673418011214098
ADV1:  0.0027983045465888555 0.0016185972422909957 0.009934048135977641 0.06461008737484619 -0.11936271306148749
ADVB:  (23524,) (34956,) 0.6729602929396956
ADV2:  0.

attitude |    0.09    0.01    0.10 |    1.16    0.66    1.85 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.08   -0.07   -0.05 |    0.06    0.05    0.07
a_f      |    0.00    0.04 |    0.67    1.85 |   -1.43   -3.14 |    1.47    3.12
w_f      |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.05 |    0.05    0.03    0.04
w_rewards |   -0.30 |    0.20 |   -1.77 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.28 |    0.29 |    0.00 |    2.57
seeker_angles |    0.00    0.00 |    0.09    0.09 |   -0.99   -0.99 |    0.98    1.00
cs_angles |  0.0035  0.0021 |  0.0885  0.0866 | -0.9875 -0.9934 |  0.9764  0.9968
optical_flow |  0.0002  0.0004 |  0.0258  0.0229 | -1.2222 -1.1071 |  1.0103  0.9131
v_err    | -0.0113 |  0.0601 | -0.4529 |  0.1612
landing_rewards |    8.90 |    3.12 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4460   0.1303   0.8205  46.1133  16.1167  11.3691
Update Cnt = 870    ET =   1358.1   Stats:  Mean, Std, Min, Max
r_f      |    2.42   -1.50   14.89 |  185.60  179.88  198.79 | -372.91 -389.85 -385.64 |  395.85  384.34  380.51
v_f      |    0.00    0.00   -0.00 |    0.04    0.05    0.05 |   -0.10   -0.10   -0.12 |    0.11    0.10    0.10
r_i      |  -17.84    7.29   44.36 |  668.69  687.73  756.61 |-1282.45-1299.78-1348.09 | 1246.16 1272.61 1322.37
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.25 |    0.11 |    0.04 |    0.66
norm_vf  |    0.08 |    0.02 |    0.02 |    0.12
gs_f     |    1.25 |    1.58 |    0.01 |   10.55
thrust   |   -0.00    0.00   -0.00 |    0.67    0.68    0.67 |   -3.44   -3.34   -3.43 |    3.41    3.40    3.41
norm_thrust |    0.90 |    0.74 |    0.00 |    3.46
fuel     |    1.56 |    0.19 |    1.08 |    2.43
rewards  |  -20.67 |

ADVA:  (21808,) (34991,) 0.623245977537081
ADV1:  0.001980536996628755 0.00010103219769236235 0.012302353526953939 0.09134691434416278 -0.1314641013628028
ADVB:  (22618,) (34991,) 0.6463947872310023
ADV2:  0.23164769653878814 0.35305684066224274 0.42248269449867104 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4287   0.1482   0.7414  46.1133  16.1167  11.3691
***** Episode 27271, Mean R = -22.7  Std R = 11.8  Min R = -73.1
PolicyLoss: 1.93
Policy_Entropy: 0.173
Policy_KL: 0.00458
Policy_SD: 0.553
Steps: 1.16e+04
TotalSteps: 9.59e+06
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.00779


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0036   0.0022   0.0080   8.4490   2.3854   3.1146
ADVA:  (22093,) (35360,) 0.624802036199095
ADV1:  0.0 -0.0016621576146267293 0.012381297006493772 0.09134691434416278 -0.1314641013628028
ADVB:  (17682,) (35360,) 0.5000565

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0010   0.0039   8.4490   2.3854   3.1146
ADVA:  (21431,) (35043,) 0.6115629369631596
ADV1:  0.0008918180164816034 -2.0104676641543864e-05 0.011511723726368796 0.09084822015947897 -0.17441101663375558
ADVB:  (19511,) (35043,) 0.5567731073252861
ADV2:  0.09153825649059415 0.31597769479352866 0.457649700417169 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3513   0.1381   0.6556  46.1133  16.1167  11.3691
***** Episode 27519, Mean R = -20.6  Std R = 7.9  Min R = -42.8
PolicyLoss: 2.01
Policy_Entropy: 0.173
Policy_KL: 0.00746
Policy_SD: 0.552
Steps: 1.17e+04
TotalSteps: 9.68e+06
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.00743


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0013   0.0050   8.4490   2.3854   3.1146
ADVA:  (19204,) (35035,) 0.5481375767090053
ADV1:  0.00021528997694215957 -0.00032347028061613874 0.011210243764204228 0.09084822015947897 -0.17441101663375558
AD

***** Episode 27736, Mean R = -19.5  Std R = 6.3  Min R = -38.4
PolicyLoss: 2.07
Policy_Entropy: 0.174
Policy_KL: 0.00511
Policy_SD: 0.553
Steps: 1.16e+04
TotalSteps: 9.76e+06
VF_0_ExplainedVarNew: 0.976
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.00676


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0017   0.0064   8.4490   2.3854   3.1146
ADVA:  (19867,) (35115,) 0.5657696141250178
ADV1:  0.0008434591338146724 2.7057832085005717e-05 0.009904804878211021 0.0658130431009687 -0.06641790445236165
ADVB:  (21270,) (35115,) 0.6057240495514737
ADV2:  0.18517987239164038 0.3722988252016485 0.4891276103156049 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5407   0.2060   0.8794  46.1133  16.1167  11.3691
***** Episode 27767, Mean R = -19.0  Std R = 6.1  Min R = -32.8
PolicyLoss: 2.16
Policy_Entropy: 0.174
Policy_KL: 0.0056
Policy_SD: 0.549
Steps: 1.18e+04
TotalSteps: 9.77e+06
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.00452


ValFun  Gradi

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3809   0.1544   0.7126  46.1133  16.1167  11.3691
***** Episode 27984, Mean R = -19.5  Std R = 6.1  Min R = -35.3
PolicyLoss: 2.23
Policy_Entropy: 0.174
Policy_KL: 0.00519
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 9.86e+06
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00545


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0021   0.0071   8.4490   2.3854   3.1146
ADVA:  (21610,) (35201,) 0.6139030141189171
ADV1:  0.0 -0.0007106230150133761 0.009632839889620725 0.06745448906862828 -0.08678889923757704
ADVB:  (16162,) (35201,) 0.45913468367375926
ADV2:  0.0 0.2497128323741336 0.4195596619780128 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4735   0.1714   0.9083  46.1133  16.1167  11.3691
***** Episode 28015, Mean R = -20.9  Std R = 7.3  Min R = -40.9
PolicyLoss: 1.9
Policy_Entropy: 0.175
Policy_KL: 0.00634
Policy_SD: 0.542
Steps: 1.18e+04
TotalSteps: 9.87e+06
VF_0_ExplainedVarNew

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |   -0.04    0.02 |    0.68    1.87 |   -1.51   -3.12 |    1.36    3.14
w_f      |    0.01    0.00   -0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.04 |    0.04    0.03    0.02
w_rewards |   -0.25 |    0.18 |   -1.16 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.27 |    0.29 |    0.00 |    2.87
seeker_angles |    0.00    0.00 |    0.09    0.08 |   -0.98   -0.99 |    1.00    0.94
cs_angles |  0.0025  0.0049 |  0.0855  0.0818 | -0.9825 -0.9944 |  0.9990  0.9438
optical_flow |  0.0001  0.0002 |  0.0262  0.0244 | -1.0486 -1.2234 |  1.2861  1.0559
v_err    | -0.0102 |  0.0589 | -0.4528 |  0.2029
landing_rewards |    8.94 |    3.08 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.08 |    0.03
tracking_rewards |  -23.87 |    4.96 |  -42.33 |  -14.26
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.1409   2.1048   7.6575  46.1133  16.1167  11.3691
Update Cnt = 920    ET =   1628.2   Stats:  Mean, Std, Min, Max
r_f      |   -2.42  -11.84   -1.44 |  178.95  173.88  201.21 | -380.89 -383.49 -383.61 |  382.85  376.46  373.52
v_f      |    0.00    0.01   -0.00 |    0.04    0.04    0.05 |   -0.11   -0.10   -0.11 |    0.09    0.11    0.12
r_i      |   17.96  -77.50   19.79 |  646.56  670.37  782.76 |-1304.75-1337.54-1353.46 | 1323.31 1288.61 1308.02
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.09    0.10
norm_rf  |    0.24 |    0.10 |    0.04 |    0.57
norm_vf  |    0.08 |    0.02 |    0.03 |    0.13
gs_f     |    1.32 |    1.72 |    0.01 |   12.25
thrust   |    0.00    0.00   -0.00 |    0.68    0.69    0.69 |   -3.45   -3.42   -3.40 |    3.41    3.46    3.43
norm_thrust |    0.92 |    0.75 |    0.00 |    3.46
fuel     |    1.58 |    0.20 |    1.06 |    2.62
rewards  |  -20.52 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.6231   1.7814   7.2274  46.1133  16.1167  11.3691
***** Episode 28821, Mean R = -21.6  Std R = 7.1  Min R = -43.7
PolicyLoss: 2.25
Policy_Entropy: 0.176
Policy_KL: 0.00912
Policy_SD: 0.555
Steps: 1.19e+04
TotalSteps: 1.02e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00477


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0022   0.0080   8.4490   2.3854   3.1146
ADVA:  (19851,) (35173,) 0.5643817701077531
ADV1:  0.00023846423186338166 -0.00046247788067537555 0.009425938653107726 0.053684846817175 -0.0914935424145682
ADVB:  (20337,) (35173,) 0.5781991868762971
ADV2:  0.13505921623666092 0.3576403216726009 0.4857225788420733 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1282   0.4374   2.0456  46.1133  16.1167  11.3691
***** Episode 28852, Mean R = -19.5  Std R = 6.4  Min R = -45.8
PolicyLoss: 2.17
Policy_Entropy: 0.176
Policy_KL: 0.0078
Policy_SD: 0.551
Steps: 1.16e+04
TotalStep

ADVA:  (22151,) (34898,) 0.6347355149292223
ADV1:  0.00036105832012087516 -0.000780088663335414 0.01006093154294747 0.09873872120875049 -0.13766095135781875
ADVB:  (18568,) (34898,) 0.5320648747779242
ADV2:  0.05456870007805438 0.2832700175200639 0.44411459600094744 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5170   0.1805   0.8133  46.1133  16.1167  11.3691
***** Episode 29069, Mean R = -21.0  Std R = 6.6  Min R = -34.7
PolicyLoss: 1.87
Policy_Entropy: 0.175
Policy_KL: 0.00768
Policy_SD: 0.558
Steps: 1.17e+04
TotalSteps: 1.03e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.00569


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0016   0.0058   8.4490   2.3854   3.1146
ADVA:  (20367,) (35059,) 0.5809349952936479
ADV1:  0.0013488582065411077 9.995568649638781e-05 0.009774839211734335 0.05564635152122388 -0.07140385968512214
ADVB:  (21915,) (35059,) 0.6250891354573719
ADV2:  0.21495151863207557 0.3781714966311537 0.4903197709627687

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0043   0.0027   0.0103   8.4490   2.3854   3.1146
ADVA:  (17916,) (35025,) 0.5115203426124197
ADV1:  0.002283576233154885 0.0015548459325469297 0.010571159656744489 0.08954492972937755 -0.2063921082036993
ADVB:  (24585,) (35025,) 0.7019271948608137
ADV2:  0.39063044088154636 0.5227961519549557 0.5542247095008745 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3949   0.1425   0.7538  46.1133  16.1167  11.3691
***** Episode 29317, Mean R = -18.3  Std R = 6.3  Min R = -38.3
PolicyLoss: 2.62
Policy_Entropy: 0.176
Policy_KL: 0.00794
Policy_SD: 0.545
Steps: 1.17e+04
TotalSteps: 1.04e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.00539


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0055   0.0033   0.0130   8.4490   2.3854   3.1146
ADVA:  (21783,) (34989,) 0.6225670925147904
ADV1:  0.0 -0.0011918924197550067 0.01238144694746627 0.1000680317654647 -0.2063921082036993
ADVB:  (17605,) (34989,) 0.5

***** Episode 29534, Mean R = -18.6  Std R = 5.3  Min R = -30.5
PolicyLoss: 2.4
Policy_Entropy: 0.177
Policy_KL: 0.00648
Policy_SD: 0.542
Steps: 1.18e+04
TotalSteps: 1.04e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.00419


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0009   0.0034   8.4490   2.3854   3.1146
ADVA:  (18931,) (35116,) 0.539098986217109
ADV1:  0.0 -0.000933847733759738 0.008793970110007588 0.06287616018897324 -0.06627079176385836
ADVB:  (18810,) (35116,) 0.5356532634696435
ADV2:  0.06860849754841644 0.33483104564126065 0.4873079328174656 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8142   0.2683   1.4036  46.1133  16.1167  11.3691
***** Episode 29565, Mean R = -18.9  Std R = 6.1  Min R = -31.5
PolicyLoss: 2.17
Policy_Entropy: 0.177
Policy_KL: 0.00777
Policy_SD: 0.545
Steps: 1.16e+04
TotalSteps: 1.05e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00537


ValFun  Gradients: u/sd/Max/C M

cs_angles |  0.0081  0.0058 |  0.0855  0.0835 | -0.9969 -0.9708 |  0.9931  0.9997
optical_flow | -0.0001 -0.0000 |  0.0239  0.0246 | -1.3186 -1.2759 |  1.0222  1.0589
v_err    | -0.0103 |  0.0596 | -0.4534 |  0.1890
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.07 |    0.04
tracking_rewards |  -23.79 |    5.28 |  -45.79 |  -14.59
steps    |     377 |      20 |     337 |     417
***** Episode 29813, Mean R = -18.5  Std R = 5.4  Min R = -35.6
PolicyLoss: 1.98
Policy_Entropy: 0.177
Policy_KL: 0.00814
Policy_SD: 0.561
Steps: 1.17e+04
TotalSteps: 1.05e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00459


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0018   0.0069   8.4490   2.3854   3.1146
ADVA:  (18900,) (34928,) 0.5411131470453504
ADV1:  0.0004604257173689884 0.00043394141289088405 0.00885009299044549 0.07867834687298031 -0.14574065759485855
ADVB:  (19212,) (34928,) 0.5500458085203848
ADV2:  0.

attitude |   -0.14   -0.01    0.18 |    1.13    0.63    1.75 |   -3.14   -1.56   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.08   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |   -0.01    0.14 |    0.63    1.76 |   -1.43   -3.12 |    1.49    3.11
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.04 |    0.04    0.02    0.03
w_rewards |   -0.27 |    0.20 |   -2.05 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.27 |    0.30 |    0.00 |    3.08
seeker_angles |    0.01    0.00 |    0.09    0.08 |   -0.99   -0.99 |    0.97    0.99
cs_angles |  0.0054  0.0031 |  0.0859  0.0848 | -0.9890 -0.9950 |  0.9687  0.9885
optical_flow | -0.0000  0.0001 |  0.0244  0.0234 | -1.0398 -1.0727 |  1.0963  1.0270
v_err    | -0.0104 |  0.0596 | -0.4548 |  0.1122
landing_rewards |    9.39 |    2.40 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5124   0.1924   0.9616  46.1133  16.1167  11.3691
Update Cnt = 980    ET =   1401.6   Stats:  Mean, Std, Min, Max
r_f      |    5.65    4.12  -37.61 |  177.68  165.97  211.23 | -387.74 -370.18 -389.58 |  399.79  385.99  388.32
v_f      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.09    0.09
r_i      |   12.44   40.34 -115.75 |  639.68  637.03  817.95 |-1300.44-1240.53-1356.48 | 1338.94 1296.76 1373.33
v_i      |   -0.00   -0.00    0.01 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.21 |    0.09 |    0.03 |    0.58
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.54 |    1.99 |    0.01 |   14.00
thrust   |    0.01   -0.00   -0.01 |    0.67    0.68    0.68 |   -3.43   -3.38   -3.41 |    3.44    3.41    3.45
norm_thrust |    0.91 |    0.74 |    0.00 |    3.46
fuel     |    1.61 |    0.19 |    1.17 |    2.28
rewards  |  -18.53 |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4316   0.1364   0.7396  46.1133  16.1167  11.3691
***** Episode 30681, Mean R = -21.8  Std R = 8.8  Min R = -46.6
PolicyLoss: 2.09
Policy_Entropy: 0.179
Policy_KL: 0.0103
Policy_SD: 0.564
Steps: 1.16e+04
TotalSteps: 1.09e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00402


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0009   0.0036   8.4490   2.3854   3.1146
ADVA:  (19631,) (35102,) 0.5592558828556777
ADV1:  0.00021846810319879444 -0.0005763809464606182 0.010810528387129557 0.08631218223752479 -0.12740882337201942
ADVB:  (19326,) (35102,) 0.5505669192638596
ADV2:  0.08876449917791426 0.32573870437033464 0.484537077142203 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6223   0.2069   1.1576  46.1133  16.1167  11.3691
***** Episode 30712, Mean R = -18.7  Std R = 6.3  Min R = -31.9
PolicyLoss: 2.06
Policy_Entropy: 0.179
Policy_KL: 0.0103
Policy_SD: 0.561
Steps: 1.17e+04
TotalStep

ADVA:  (20755,) (34986,) 0.5932372949179672
ADV1:  0.000645748781062455 4.968219570733681e-05 0.009772626140708555 0.06868099970086233 -0.10858362951904432
ADVB:  (18949,) (34986,) 0.5416166466586635
ADV2:  0.07293350807803335 0.34537208799537233 0.49917589113247873 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6734   0.2247   1.1158  46.1133  16.1167  11.3691
***** Episode 30929, Mean R = -20.5  Std R = 5.3  Min R = -32.2
PolicyLoss: 2.21
Policy_Entropy: 0.179
Policy_KL: 0.00701
Policy_SD: 0.567
Steps: 1.15e+04
TotalSteps: 1.1e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00485


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0018   8.4490   2.3854   3.1146
ADVA:  (18743,) (35063,) 0.534552091948778
ADV1:  0.0008208491835814818 0.0007919229932238388 0.008906106867213753 0.06868099970086233 -0.10858362951904432
ADVB:  (20809,) (35063,) 0.5934746028577133
ADV2:  0.1820061901449339 0.4375608580290572 0.5800191728580217 3.

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0049   0.0028   0.0100   8.4490   2.3854   3.1146
ADVA:  (20237,) (34629,) 0.5843945825752982
ADV1:  0.0011699343438352971 0.0005125968120300178 0.009894370385502437 0.052655885252939266 -0.07307573034861348
ADVB:  (20285,) (34629,) 0.585780704034191
ADV2:  0.20197635897330746 0.42714328171229926 0.5465847338982825 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   9.8635   6.0988  22.7395  46.1133  16.1167  11.3691
***** Episode 31177, Mean R = -19.0  Std R = 7.2  Min R = -38.0
PolicyLoss: 2.53
Policy_Entropy: 0.179
Policy_KL: 0.00637
Policy_SD: 0.553
Steps: 1.16e+04
TotalSteps: 1.11e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00408


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0060   0.0037   0.0140   8.4490   2.3854   3.1146
ADVA:  (19545,) (34819,) 0.5613314569631523
ADV1:  0.0010223269711320728 1.5885958340270057e-05 0.010304469293707999 0.052655885252939266 -0.07307573034861348
ADVB

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0028   8.4490   2.3854   3.1146
ADVA:  (20469,) (35430,) 0.5777307366638442
ADV1:  0.0006848124739880727 -0.00020471570084182046 0.010805614735570996 0.0711531095297504 -0.0695236026465432
ADVB:  (20746,) (35430,) 0.5855489697996048
ADV2:  0.19134139026283564 0.40918831334954725 0.5274173414257888 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9730   0.3610   1.8648  46.1133  16.1167  11.3691
***** Episode 31425, Mean R = -20.5  Std R = 6.3  Min R = -39.9
PolicyLoss: 2.4
Policy_Entropy: 0.182
Policy_KL: 0.0056
Policy_SD: 0.555
Steps: 1.19e+04
TotalSteps: 1.12e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.00429


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0013   0.0051   8.4490   2.3854   3.1146
ADVA:  (21115,) (35433,) 0.5959134140490503
ADV1:  0.00038744074809004755 -0.0004968085153581721 0.010580225354966475 0.059271399224236826 -0.0695236026465432
ADVB: 

cs_angles |  0.0039  0.0032 |  0.0853  0.0831 | -0.9869 -0.9811 |  0.9926  0.9768
optical_flow |  0.0001  0.0002 |  0.0249  0.0234 | -1.2927 -0.9248 |  1.0291  1.2172
v_err    | -0.0102 |  0.0588 | -0.4536 |  0.1192
landing_rewards |    9.16 |    2.77 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.08 |    0.03
tracking_rewards |  -23.32 |    5.18 |  -47.06 |  -14.50
steps    |     379 |      20 |     334 |     418
***** Episode 31673, Mean R = -17.4  Std R = 6.3  Min R = -38.1
PolicyLoss: 1.97
Policy_Entropy: 0.183
Policy_KL: 0.00595
Policy_SD: 0.551
Steps: 1.18e+04
TotalSteps: 1.12e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00492


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0009   0.0034   8.4490   2.3854   3.1146
ADVA:  (19496,) (35285,) 0.5525294034292192
ADV1:  0.00023800682013531624 -0.000401345241567057 0.009317392934956271 0.07312191591261769 -0.11681577761389245
ADVB:  (19918,) (35285,) 0.564489159699589
ADV2:  0.

attitude |    0.12    0.04   -0.21 |    1.25    0.68    1.86 |   -3.14   -1.56   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |    0.03   -0.25 |    0.68    1.86 |   -1.56   -3.14 |    1.51    3.12
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.03   -0.03   -0.04 |    0.04    0.02    0.02
w_rewards |   -0.33 |    0.18 |   -1.09 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.28 |    0.31 |    0.00 |    2.61
seeker_angles |    0.01    0.00 |    0.09    0.09 |   -1.00   -1.00 |    1.00    1.00
cs_angles |  0.0063  0.0049 |  0.0884  0.0882 | -0.9962 -0.9995 |  0.9955  0.9975
optical_flow | -0.0000  0.0002 |  0.0246  0.0232 | -1.4575 -1.0765 |  1.0865  1.4563
v_err    | -0.0102 |  0.0590 | -0.4539 |  0.1324
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6285   0.2308   1.1842  46.1133  16.1167  11.3691
Update Cnt = 1040    ET =   1360.1   Stats:  Mean, Std, Min, Max
r_f      |  -10.58  -11.83    7.34 |  186.15  163.25  210.92 | -398.52 -384.13 -391.17 |  390.44  386.81  382.44
v_f      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.09    0.11    0.10
r_i      |  -10.07  -36.57   18.59 |  700.65  625.91  779.37 |-1348.23-1334.55-1308.54 | 1340.68 1326.59 1294.39
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.08   -0.10 |    0.10    0.10    0.09
norm_rf  |    0.22 |    0.09 |    0.04 |    0.52
norm_vf  |    0.08 |    0.02 |    0.04 |    0.13
gs_f     |    1.35 |    1.82 |    0.01 |   16.22
thrust   |   -0.00    0.00    0.00 |    0.70    0.70    0.69 |   -3.39   -3.40   -3.40 |    3.46    3.36    3.45
norm_thrust |    0.94 |    0.75 |    0.00 |    3.46
fuel     |    1.62 |    0.21 |    1.10 |    2.82
rewards  |  -19.50 

ADVA:  (18618,) (35031,) 0.5314721246895606
ADV1:  0.0 -0.0014025479275622148 0.008721993891564066 0.0625902234611675 -0.07438802954494594
ADVB:  (17054,) (35031,) 0.4868259541548914
ADV2:  0.0 0.2974939985764906 0.4807046610277254 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6721   0.2273   1.1698  46.1133  16.1167  11.3691
***** Episode 32541, Mean R = -19.8  Std R = 6.1  Min R = -34.3
PolicyLoss: 2.06
Policy_Entropy: 0.186
Policy_KL: 0.00788
Policy_SD: 0.552
Steps: 1.17e+04
TotalSteps: 1.16e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00588


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0016   0.0058   8.4490   2.3854   3.1146
ADVA:  (20799,) (35045,) 0.5934940790412327
ADV1:  0.0 -0.0015093375909180952 0.01005713287399007 0.0625902234611675 -0.07438802954494594
ADVB:  (16839,) (35045,) 0.4804965044942217
ADV2:  0.0 0.289318474656069 0.4662947171742876 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6655   0.

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0036   8.4490   2.3854   3.1146
ADVA:  (21292,) (35071,) 0.6071112885289841
ADV1:  0.0001974663447389625 -0.0007028968656371231 0.010004989216023734 0.06912792234247023 -0.0756749574138228
ADVB:  (17904,) (35071,) 0.5105072567078213
ADV2:  0.01696858091377707 0.2820975628975306 0.4391213023958085 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.3040   3.2506  12.2189  46.1133  16.1167  11.3691
***** Episode 32789, Mean R = -19.4  Std R = 7.5  Min R = -37.3
PolicyLoss: 1.87
Policy_Entropy: 0.186
Policy_KL: 0.00971
Policy_SD: 0.549
Steps: 1.16e+04
TotalSteps: 1.17e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00407


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0012   0.0046   8.4490   2.3854   3.1146
ADVA:  (18789,) (35017,) 0.5365679527086844
ADV1:  0.0015546544312779855 0.0011597994265185134 0.009180308673958961 0.06912792234247023 -0.07984170554987421
ADVB: 

***** Episode 33006, Mean R = -18.9  Std R = 4.7  Min R = -27.8
PolicyLoss: 3.42
Policy_Entropy: 0.187
Policy_KL: 0.0116
Policy_SD: 0.549
Steps: 1.18e+04
TotalSteps: 1.18e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.00465


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0014   0.0055   8.4490   2.3854   3.1146
ADVA:  (18189,) (35614,) 0.5107261189419892
ADV1:  -0.0012912738645383844 -0.0020668908728191074 0.007255758702711951 0.07386156603285904 -0.06538639686261605
ADVB:  (13988,) (35614,) 0.3927668894255068
ADV2:  0.0 0.25287960690202993 0.47550705603997695 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.4167   1.3233   6.4099  46.1133  16.1167  11.3691
***** Episode 33037, Mean R = -18.9  Std R = 6.6  Min R = -38.3
PolicyLoss: 2.18
Policy_Entropy: 0.186
Policy_KL: 0.00864
Policy_SD: 0.546
Steps: 1.21e+04
TotalSteps: 1.18e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00396


ValFun  Gradients: u/sd/Ma

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1979   0.5141   2.4689  46.1133  16.1167  11.3691
***** Episode 33254, Mean R = -18.1  Std R = 5.0  Min R = -27.0
PolicyLoss: 2.43
Policy_Entropy: 0.186
Policy_KL: 0.00757
Policy_SD: 0.539
Steps: 1.16e+04
TotalSteps: 1.18e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00435


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0014   0.0055   8.4490   2.3854   3.1146
ADVA:  (19472,) (34977,) 0.5567086942848157
ADV1:  0.0 -0.0010770143672569194 0.00727761921967473 0.049438226459841084 -0.0645335496613553
ADVB:  (16434,) (34977,) 0.469851616776739
ADV2:  0.0 0.2662334482275625 0.43569223416437525 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8111   0.3074   1.6972  46.1133  16.1167  11.3691
***** Episode 33285, Mean R = -18.0  Std R = 4.9  Min R = -34.1
PolicyLoss: 1.91
Policy_Entropy: 0.187
Policy_KL: 0.00811
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 1.19e+07
VF_0_ExplainedVarNew: 0

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.08   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |    0.02    0.03 |    0.69    1.93 |   -1.48   -3.12 |    1.50    3.14
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.04 |    0.04    0.02    0.02
w_rewards |   -0.28 |    0.19 |   -0.89 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.26 |    0.29 |    0.00 |    2.47
seeker_angles |    0.00    0.00 |    0.09    0.08 |   -0.99   -0.99 |    1.00    0.95
cs_angles |  0.0021  0.0045 |  0.0868  0.0823 | -0.9897 -0.9872 |  0.9958  0.9501
optical_flow |  0.0001  0.0001 |  0.0241  0.0239 | -1.0425 -0.9961 |  1.1007  1.5421
v_err    | -0.0108 |  0.0594 | -0.4620 |  0.1119
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.07 |    0.04
tracking_rewards |  -23.42 |    5.28 |  -50.64 |  -14.66
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5238   0.2110   1.0576  46.1133  16.1167  11.3691
Update Cnt = 1090    ET =   1378.6   Stats:  Mean, Std, Min, Max
r_f      |   -2.90    0.95   14.83 |  185.74  165.17  203.99 | -390.63 -375.07 -370.80 |  370.55  375.20  385.79
v_f      |    0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.10    0.11    0.10
r_i      |   -8.16   20.07   67.87 |  714.07  650.76  747.36 |-1331.02-1319.11-1349.73 | 1299.24 1212.19 1260.62
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.08   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.20 |    0.09 |    0.03 |    0.46
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    2.05 |   16.64 |    0.02 |  293.36
thrust   |    0.00   -0.00   -0.00 |    0.68    0.68    0.67 |   -3.44   -3.46   -3.31 |    3.35    3.34    3.34
norm_thrust |    0.90 |    0.74 |    0.00 |    3.46
fuel     |    1.57 |    0.19 |    1.08 |    2.32
rewards  |  -19.03 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9760   0.3558   1.6583  46.1133  16.1167  11.3691
***** Episode 34091, Mean R = -18.3  Std R = 5.2  Min R = -31.6
PolicyLoss: 2.33
Policy_Entropy: 0.188
Policy_KL: 0.00684
Policy_SD: 0.538
Steps: 1.16e+04
TotalSteps: 1.22e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00441


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0017   8.4490   2.3854   3.1146
ADVA:  (20444,) (35012,) 0.5839140865988803
ADV1:  0.00029859612232438146 2.6796040434967602e-05 0.009916694359742217 0.08450920876571189 -0.15287242777022236
ADVB:  (18993,) (35012,) 0.5424711527476294
ADV2:  0.07903437345850783 0.36241875219879577 0.5300838162517347 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  12.6746   8.8533  38.0132  46.1133  16.1167  11.3691
***** Episode 34122, Mean R = -19.0  Std R = 5.1  Min R = -28.4
PolicyLoss: 2.26
Policy_Entropy: 0.188
Policy_KL: 0.0127
Policy_SD: 0.544
Steps: 1.17e+04
TotalS

ADVA:  (19622,) (35489,) 0.5529037166445941
ADV1:  0.0 -0.0004966353786293748 0.008745372612942235 0.080290428297497 -0.07692064491179129
ADVB:  (18873,) (35489,) 0.5317985854771902
ADV2:  0.06394396903158873 0.3450885593783687 0.5139275363671615 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7783   0.2876   1.3919  46.1133  16.1167  11.3691
***** Episode 34339, Mean R = -18.2  Std R = 6.0  Min R = -34.6
PolicyLoss: 2.18
Policy_Entropy: 0.188
Policy_KL: 0.00671
Policy_SD: 0.541
Steps: 1.2e+04
TotalSteps: 1.23e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00357


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0007   8.4490   2.3854   3.1146
ADVA:  (20439,) (35438,) 0.5767537671426153
ADV1:  0.0012384623486916026 0.00035968758475514787 0.008968011687731432 0.04751335277476676 -0.08242260566742088
ADVB:  (21838,) (35438,) 0.6162311642869236
ADV2:  0.22037449680253496 0.3835834079711009 0.4743757990579898 3.0 0.0
Policy  Grad

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0020   8.4490   2.3854   3.1146
ADVA:  (20436,) (35481,) 0.5759702375919507
ADV1:  0.0013809842549633005 0.0004306540824883792 0.00952467890234613 0.056179619435506534 -0.10611684429290935
ADVB:  (22346,) (35481,) 0.6298018657873228
ADV2:  0.2297261435106073 0.3957131194249755 0.5021625157277249 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5774   0.1824   0.9878  46.1133  16.1167  11.3691
***** Episode 34587, Mean R = -20.1  Std R = 6.6  Min R = -33.8
PolicyLoss: 2.12
Policy_Entropy: 0.189
Policy_KL: 0.00577
Policy_SD: 0.546
Steps: 1.19e+04
TotalSteps: 1.24e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.00378


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0014   0.0051   8.4490   2.3854   3.1146
ADVA:  (22924,) (35599,) 0.643950672771707
ADV1:  0.0004471455883101119 -0.0006407573409321542 0.010157392540253133 0.06876468006749081 -0.10611684429290935
ADVB:  

***** Episode 34804, Mean R = -19.3  Std R = 7.4  Min R = -33.5
PolicyLoss: 2.11
Policy_Entropy: 0.189
Policy_KL: 0.00525
Policy_SD: 0.543
Steps: 1.16e+04
TotalSteps: 1.24e+07
VF_0_ExplainedVarNew: 0.979
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00309


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0016   0.0058   8.4490   2.3854   3.1146
ADVA:  (21670,) (34912,) 0.6207034830430798
ADV1:  0.0 -0.001614987297347789 0.011408095644167443 0.08632027233525269 -0.07803717882298827
ADVB:  (17528,) (34912,) 0.5020623281393217
ADV2:  0.004318059730856662 0.26159071586799365 0.39695113416862965 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9375   0.3812   1.7778  46.1133  16.1167  11.3691
***** Episode 34835, Mean R = -21.2  Std R = 7.8  Min R = -37.4
PolicyLoss: 1.75
Policy_Entropy: 0.189
Policy_KL: 0.0063
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 1.24e+07
VF_0_ExplainedVarNew: 0.977
VF_0_ExplainedVarOld: 0.974
VF_0_Loss : 0.00363


ValFun  Gradients: u/sd/Max/

seeker_angles |    0.01    0.01 |    0.09    0.08 |   -0.99   -0.96 |    0.95    0.98
cs_angles |  0.0055  0.0053 |  0.0899  0.0806 | -0.9912 -0.9615 |  0.9527  0.9837
optical_flow | -0.0000  0.0001 |  0.0216  0.0223 | -1.0424 -0.9914 |  1.3220  1.1456
v_err    | -0.0106 |  0.0596 | -0.4529 |  0.1239
landing_rewards |    8.87 |    3.16 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.06 |    0.03
tracking_rewards |  -23.41 |    6.09 |  -78.70 |  -14.31
steps    |     378 |      20 |     337 |     418
***** Episode 35083, Mean R = -19.5  Std R = 7.7  Min R = -46.3
PolicyLoss: 2.21
Policy_Entropy: 0.19
Policy_KL: 0.00582
Policy_SD: 0.534
Steps: 1.16e+04
TotalSteps: 1.25e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00437


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0012   0.0045   8.4490   2.3854   3.1146
ADVA:  (18675,) (35234,) 0.5300278140432537
ADV1:  0.0 -0.0008208444989028733 0.009837150078204734 0.05669054401889012 -0.154

thrust   |    0.01    0.00    0.00 |    0.67    0.67    0.67 |   -3.40   -3.44   -3.44 |    3.37    3.44    3.45
norm_thrust |    0.89 |    0.74 |    0.00 |    3.46
fuel     |    1.55 |    0.20 |    1.15 |    2.35
rewards  |  -19.14 |    6.46 |  -51.97 |   -7.53
fuel_rewards |   -4.43 |    0.57 |   -6.74 |   -3.29
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    2.91 |   12.85 |    0.01 |  292.98
norm_af  |    1.86 |    0.88 |    0.11 |    3.31
norm_wf  |    0.02 |    0.01 |    0.00 |    0.04
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |    0.01   -0.02   -0.13 |    1.27    0.64    1.95 |   -3.14   -1.55   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.07   -0.05 |    0.08    0.05    0.07
a_f      |   -0.03   -0.18 |    0.64

ADVA:  (21962,) (34870,) 0.62982506452538
ADV1:  0.0011112819930474416 -0.0005443985628216679 0.01135225658840264 0.11131152679330591 -0.15613698429041867
ADVB:  (20648,) (34870,) 0.5921422426154287
ADV2:  0.1506820842731857 0.3290677412570465 0.4309380507014756 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6924   0.2839   1.3626  46.1133  16.1167  11.3691
***** Episode 35672, Mean R = -17.7  Std R = 6.1  Min R = -33.0
PolicyLoss: 1.86
Policy_Entropy: 0.191
Policy_KL: 0.00696
Policy_SD: 0.537
Steps: 1.16e+04
TotalSteps: 1.28e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.975
VF_0_Loss : 0.00439


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0013   8.4490   2.3854   3.1146
ADVA:  (21839,) (34506,) 0.6329044224192893
ADV1:  0.0016189738825362733 -6.602866480839853e-05 0.011224866579403891 0.11131152679330591 -0.15613698429041867
ADVB:  (21556,) (34506,) 0.6247029502115574
ADV2:  0.19840400113110995 0.36086428117847014 0.45222415822174333 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0023   0.0087   8.4490   2.3854   3.1146
ADVA:  (19810,) (35151,) 0.5635686040226452
ADV1:  4.826394532221612e-05 -0.0009644467124932601 0.01067224328420255 0.07629845878444619 -0.07750120445548886
ADVB:  (19953,) (35151,) 0.5676367670905522
ADV2:  0.12490947321922068 0.35017073157657913 0.4702937805328593 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9829   0.3959   1.7838  46.1133  16.1167  11.3691
***** Episode 35920, Mean R = -17.3  Std R = 5.6  Min R = -29.2
PolicyLoss: 2.07
Policy_Entropy: 0.191
Policy_KL: 0.00754
Policy_SD: 0.535
Steps: 1.16e+04
TotalSteps: 1.29e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00511


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0008   8.4490   2.3854   3.1146
ADVA:  (21700,) (35052,) 0.6190802236676937
ADV1:  0.0009204502455298549 -0.0004161229386938201 0.010770383334772773 0.07629845878444619 -0.07750120445548886
ADVB

***** Episode 36137, Mean R = -20.8  Std R = 9.1  Min R = -45.9
PolicyLoss: 1.62
Policy_Entropy: 0.191
Policy_KL: 0.00654
Policy_SD: 0.548
Steps: 1.17e+04
TotalSteps: 1.29e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.00564


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0017   0.0066   8.4490   2.3854   3.1146
ADVA:  (22411,) (35135,) 0.6378539917461221
ADV1:  0.00039058257335039537 -0.0009902961932048693 0.0114196189282952 0.08196599638474389 -0.06722389767036374
ADVB:  (18923,) (35135,) 0.5385797637683222
ADV2:  0.05889384322751252 0.3048566275292813 0.4456809987630553 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4485   0.1379   0.7510  46.1133  16.1167  11.3691
***** Episode 36168, Mean R = -19.5  Std R = 7.5  Min R = -36.1
PolicyLoss: 1.89
Policy_Entropy: 0.192
Policy_KL: 0.00781
Policy_SD: 0.545
Steps: 1.14e+04
TotalSteps: 1.29e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00373


ValFun  Gradi

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9282   0.7466   3.2330  46.1133  16.1167  11.3691
***** Episode 36385, Mean R = -21.8  Std R = 6.0  Min R = -33.3
PolicyLoss: 2.01
Policy_Entropy: 0.191
Policy_KL: 0.00792
Policy_SD: 0.55
Steps: 1.16e+04
TotalSteps: 1.3e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.00555


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0038   0.0020   0.0075   8.4490   2.3854   3.1146
ADVA:  (21249,) (35324,) 0.6015456913146869
ADV1:  0.0 -0.0019448599422427282 0.011629554455216368 0.08330784321502227 -0.06419657651475888
ADVB:  (18658,) (35324,) 0.5281961272789039
ADV2:  0.04848756103588171 0.2749661422785105 0.4018598506276658 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7430   0.9099   4.0285  46.1133  16.1167  11.3691
***** Episode 36416, Mean R = -20.4  Std R = 7.3  Min R = -41.1
PolicyLoss: 1.75
Policy_Entropy: 0.19
Policy_KL: 0.00797
Policy_SD: 0.546
Steps: 1.18e+04
TotalSteps: 1.3e+07
VF_0_Expl

v_err    | -0.0103 |  0.0596 | -0.4520 |  0.1432
landing_rewards |    9.00 |    3.00 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.03
tracking_rewards |  -23.01 |    5.06 |  -51.77 |  -14.65
steps    |     379 |      19 |     335 |     419
***** Episode 36633, Mean R = -18.3  Std R = 6.8  Min R = -37.3
PolicyLoss: 1.98
Policy_Entropy: 0.19
Policy_KL: 0.0273
Policy_SD: 0.549
Steps: 1.17e+04
TotalSteps: 1.31e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00513


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0036   8.4490   2.3854   3.1146
ADVA:  (18456,) (35299,) 0.5228476727386044
ADV1:  0.0 -0.0008702935628893991 0.00848137881906273 0.07910605080414368 -0.07178031448468636
ADVB:  (18547,) (35299,) 0.5254256494518258
ADV2:  0.04907762899912938 0.3289476931764084 0.497900853727147 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   7.9221   3.4280  14.6017  46.1133  16.1167  11.9545
***** Episode 36664, 

attitude |   -0.11   -0.06   -0.04 |    1.24    0.66    1.88 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.10   -0.07   -0.05 |    0.06    0.05    0.07
a_f      |   -0.06   -0.07 |    0.65    1.88 |   -1.45   -3.12 |    1.30    3.12
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.04    0.02    0.02
w_rewards |   -0.31 |    0.24 |   -2.89 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.26 |    0.29 |    0.00 |    2.40
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.95   -0.97 |    1.00    0.99
cs_angles |  0.0041  0.0014 |  0.0840  0.0843 | -0.9487 -0.9731 |  0.9998  0.9866
optical_flow |  0.0001  0.0001 |  0.0219  0.0224 | -1.0370 -1.1001 |  1.0432  1.0095
v_err    | -0.0104 |  0.0600 | -0.4521 |  0.1357
landing_rewards |    8.94 |    3.08 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4275   0.4791   2.3236  46.1133  16.1167  11.9545
Update Cnt = 1200    ET =   1245.1   Stats:  Mean, Std, Min, Max
r_f      |   -0.01   -3.49   -2.36 |  201.11  162.26  193.79 | -385.43 -380.65 -391.52 |  377.31  372.91  385.47
v_f      |    0.00    0.00   -0.00 |    0.05    0.04    0.05 |   -0.11   -0.12   -0.10 |    0.11    0.11    0.10
r_i      |  -27.37  -12.89   12.84 |  720.21  645.01  738.37 |-1302.69-1363.24-1305.06 | 1295.97 1349.18 1283.51
v_i      |    0.00    0.00   -0.00 |    0.05    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.09    0.10    0.10
norm_rf  |    0.20 |    0.08 |    0.02 |    0.42
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.22 |    1.89 |    0.01 |   15.16
thrust   |   -0.00    0.00   -0.00 |    0.67    0.69    0.68 |   -3.33   -3.42   -3.43 |    3.46    3.44    3.31
norm_thrust |    0.92 |    0.74 |    0.00 |    3.46
fuel     |    1.59 |    0.20 |    1.17 |    2.31
rewards  |  -19.54 

ADVA:  (21194,) (34995,) 0.6056293756250893
ADV1:  0.0008319475644122075 -9.82949839820844e-05 0.009389883995674057 0.05684175178109607 -0.07220124025863195
ADVB:  (19655,) (34995,) 0.5616516645235033
ADV2:  0.11087530023809468 0.3397043335535562 0.47009644015556307 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0611   0.3518   1.6848  46.1133  16.1167  11.9545
***** Episode 37501, Mean R = -17.5  Std R = 5.9  Min R = -30.4
PolicyLoss: 2.02
Policy_Entropy: 0.191
Policy_KL: 0.0071
Policy_SD: 0.543
Steps: 1.18e+04
TotalSteps: 1.35e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00403


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0010   0.0038   8.4490   2.3854   3.1146
ADVA:  (19601,) (34890,) 0.5617942103754657
ADV1:  0.001465363970458369 0.00045294884256420547 0.009502130016279507 0.05684175178109607 -0.07220124025863195
ADVB:  (22500,) (34890,) 0.6448839208942391
ADV2:  0.257727568574866 0.4281543227979519 0.5081042407161118 3.

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0034   8.4490   2.3854   3.1146
ADVA:  (18725,) (35045,) 0.5343130261092881
ADV1:  0.0007889000404421483 0.00027481912215665397 0.008703325753299712 0.09064370613163203 -0.07104658421482649
ADVB:  (21467,) (35045,) 0.612555286060779
ADV2:  0.225879746930523 0.4268273411516525 0.5312116748676826 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8783   0.3829   1.6197  46.1133  16.1167  11.9545
***** Episode 37749, Mean R = -18.2  Std R = 7.1  Min R = -38.3
PolicyLoss: 2.34
Policy_Entropy: 0.19
Policy_KL: 0.00735
Policy_SD: 0.554
Steps: 1.17e+04
TotalSteps: 1.35e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00361


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0012   0.0042   8.4490   2.3854   3.1146
ADVA:  (19675,) (35069,) 0.5610368131398101
ADV1:  0.00046507932091775783 2.9247787323008177e-05 0.008968270417646529 0.09064370613163203 -0.07104658421482649
ADVB:  

***** Episode 37966, Mean R = -18.3  Std R = 6.6  Min R = -40.8
PolicyLoss: 2.14
Policy_Entropy: 0.192
Policy_KL: 0.0082
Policy_SD: 0.546
Steps: 1.19e+04
TotalSteps: 1.36e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.0036


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0030   8.4490   2.3854   3.1146
ADVA:  (19407,) (35340,) 0.549151103565365
ADV1:  0.001066164556689857 0.00023182187292492066 0.008962142556279955 0.051325070791894356 -0.11443730702295052
ADVB:  (22060,) (35340,) 0.624221844934918
ADV2:  0.23674295635353093 0.4054984901104165 0.4999473864974306 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7159   0.2229   1.1253  46.1133  16.1167  11.9545
***** Episode 37997, Mean R = -17.9  Std R = 4.7  Min R = -28.4
PolicyLoss: 2.15
Policy_Entropy: 0.191
Policy_KL: 0.00492
Policy_SD: 0.545
Steps: 1.17e+04
TotalSteps: 1.36e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00304


ValFun  Gradient

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4489   0.1278   0.6643  46.1133  16.1167  11.9545
***** Episode 38214, Mean R = -20.3  Std R = 6.0  Min R = -30.3
PolicyLoss: 1.82
Policy_Entropy: 0.192
Policy_KL: 0.00686
Policy_SD: 0.555
Steps: 1.16e+04
TotalSteps: 1.37e+07
VF_0_ExplainedVarNew: 0.973
VF_0_ExplainedVarOld: 0.969
VF_0_Loss : 0.00413


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0011   0.0047   8.4490   2.3854   3.1146
ADVA:  (20234,) (34859,) 0.5804526807997935
ADV1:  0.0017523803958580664 -0.00011648994810394158 0.012731921705721544 0.1306527408382352 -0.1399123243510103
ADVB:  (22559,) (34859,) 0.647149946929057
ADV2:  0.2788190847517464 0.410383841352161 0.4771284040795909 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8629   0.3788   1.4835  46.1133  16.1167  11.9545
***** Episode 38245, Mean R = -21.0  Std R = 7.2  Min R = -35.8
PolicyLoss: 2.13
Policy_Entropy: 0.191
Policy_KL: 0.00808
Policy_SD: 0.559
Steps: 1.16e+04
TotalSteps:

attitude |   -0.00   -0.01    0.08 |    1.21    0.67    1.87 |   -3.14   -1.56   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.08   -0.07   -0.05 |    0.10    0.05    0.07
a_f      |   -0.02    0.05 |    0.65    1.87 |   -1.39   -3.07 |    1.44    3.12
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.02 |    0.10    0.03    0.04
w_rewards |   -0.29 |    0.40 |   -6.59 |    0.00
w_penalty |   -0.32 |    5.67 | -100.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.26 |    0.29 |    0.00 |    2.49
seeker_angles |    0.01   -0.00 |    0.09    0.09 |   -0.99   -0.97 |    1.00    1.00
cs_angles |  0.0093 -0.0003 |  0.0851  0.0873 | -0.9880 -0.9738 |  0.9979  0.9979
optical_flow |  0.0000  0.0001 |  0.0214  0.0216 | -1.0749 -1.1399 |  0.8194  1.3085
v_err    | -0.0110 |  0.0640 | -1.1200 |  0.1179
landing_rewards |    8.97 |    3.04 |    0.00 |   10.00
landing_margin |    3

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0554   0.4112   2.0003  46.1133  16.1167  11.9545
Update Cnt = 1250    ET =   1346.5   Stats:  Mean, Std, Min, Max
r_f      |   -9.78   -1.36    8.36 |  174.43  175.05  203.71 | -381.55 -390.89 -396.23 |  389.24  368.30  385.42
v_f      |    0.00    0.00   -0.00 |    0.04    0.05    0.05 |   -0.09   -0.10   -0.12 |    0.09    0.10    0.10
r_i      |  -52.48    0.83   39.72 |  649.61  681.12  774.53 |-1258.91-1254.90-1375.71 | 1245.26 1254.20 1277.80
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.08   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.18 |    0.07 |    0.03 |    0.45
norm_vf  |    0.08 |    0.01 |    0.04 |    0.13
gs_f     |    1.50 |    2.65 |    0.01 |   27.80
thrust   |    0.00    0.00   -0.00 |    0.67    0.67    0.66 |   -3.37   -3.45   -3.37 |    3.46    3.45    3.30
norm_thrust |    0.90 |    0.73 |    0.00 |    3.46
fuel     |    1.57 |    0.18 |    1.10 |    2.51
rewards  |  -18.04 

ADVA:  (20580,) (34953,) 0.5887906617457729
ADV1:  0.0001752214058530913 -0.0005390256260742257 0.009365856198692726 0.07527890521070402 -0.07875171879033471
ADVB:  (19132,) (34953,) 0.5473636025519985
ADV2:  0.07508805859620664 0.3101502017858178 0.45811462700618655 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0395   0.4398   2.0062  46.1133  16.1167  11.9545
***** Episode 39051, Mean R = -18.8  Std R = 6.3  Min R = -35.8
PolicyLoss: 1.88
Policy_Entropy: 0.193
Policy_KL: 0.00834
Policy_SD: 0.561
Steps: 1.17e+04
TotalSteps: 1.4e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00394


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0012   8.4490   2.3854   3.1146
ADVA:  (21432,) (34912,) 0.6138863428047663
ADV1:  0.0005942826088312092 -0.0003377775288062548 0.010150072907343993 0.05538563818383202 -0.13760829256797702
ADVB:  (19673,) (34912,) 0.5635025206232814
ADV2:  0.09572587779481387 0.3025113915504821 0.429522710197372

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0007   8.4490   2.3854   3.1146
ADVA:  (20850,) (34871,) 0.5979180407788707
ADV1:  0.0006265106065228921 -0.000261710169739812 0.01016147646286104 0.07664491420747616 -0.09445943683093216
ADVB:  (19517,) (34871,) 0.5596914341429842
ADV2:  0.1097151115376372 0.3375158868636736 0.4625870330018095 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9966   0.3987   1.9467  46.1133  16.1167  11.9545
***** Episode 39299, Mean R = -16.7  Std R = 7.5  Min R = -37.6
PolicyLoss: 2
Policy_Entropy: 0.192
Policy_KL: 0.0142
Policy_SD: 0.554
Steps: 1.15e+04
TotalSteps: 1.41e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00474


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0008   8.4490   2.3854   3.1146
ADVA:  (18434,) (34801,) 0.5296974224878596
ADV1:  0.001223445365624667 0.0007250394552791295 0.009681828813426474 0.07664491420747616 -0.09445943683093216
ADVB:  (22556

***** Episode 39516, Mean R = -19.9  Std R = 7.3  Min R = -39.1
PolicyLoss: 1.83
Policy_Entropy: 0.193
Policy_KL: 0.00647
Policy_SD: 0.559
Steps: 1.18e+04
TotalSteps: 1.42e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00509


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0001   0.0006   8.4490   2.3854   3.1146
ADVA:  (20720,) (35232,) 0.5881017257039055
ADV1:  0.0004182355063498469 -0.0010687931454383753 0.010820191368199521 0.08796864644451724 -0.09548646246932779
ADVB:  (20556,) (35232,) 0.5834468664850136
ADV2:  0.1548402733380453 0.3433960449673531 0.4333817871860371 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4914   0.1705   0.9501  46.1133  16.1167  11.9545
***** Episode 39547, Mean R = -20.2  Std R = 9.1  Min R = -40.6
PolicyLoss: 1.95
Policy_Entropy: 0.194
Policy_KL: 0.00764
Policy_SD: 0.56
Steps: 1.19e+04
TotalSteps: 1.42e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.00378


ValFun  Gradi

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5941   0.2045   1.0280  46.1133  16.1167  11.9545
***** Episode 39764, Mean R = -19.3  Std R = 6.4  Min R = -37.5
PolicyLoss: 1.95
Policy_Entropy: 0.193
Policy_KL: 0.00642
Policy_SD: 0.561
Steps: 1.18e+04
TotalSteps: 1.43e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00336


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0012   8.4490   2.3854   3.1146
ADVA:  (20980,) (35050,) 0.5985734664764621
ADV1:  0.0019422294913082217 0.00021955328176230854 0.01099172084728784 0.05437957848416114 -0.09358247654651508
ADVB:  (22575,) (35050,) 0.6440798858773181
ADV2:  0.26362567324808256 0.39413168836790796 0.4477259249372267 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7646   0.2766   1.4286  46.1133  16.1167  11.9545
***** Episode 39795, Mean R = -19.6  Std R = 8.0  Min R = -47.5
PolicyLoss: 2.03
Policy_Entropy: 0.194
Policy_KL: 0.00595
Policy_SD: 0.556
Steps: 1.17e+04
TotalSt

seeker_angles |    0.00    0.00 |    0.08    0.08 |   -1.00   -0.97 |    1.00    0.99
cs_angles |  0.0039  0.0050 |  0.0825  0.0838 | -0.9953 -0.9741 |  0.9966  0.9949
optical_flow |  0.0001  0.0002 |  0.0215  0.0236 | -0.9999 -1.0016 |  0.9249  1.2806
v_err    | -0.0108 |  0.0600 | -0.4526 |  0.1408
landing_rewards |    9.39 |    2.40 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.02
tracking_rewards |  -22.58 |    5.20 |  -54.09 |  -13.11
steps    |     379 |      20 |     337 |     420
***** Episode 40043, Mean R = -18.5  Std R = 7.8  Min R = -52.9
PolicyLoss: 2.26
Policy_Entropy: 0.194
Policy_KL: 0.0089
Policy_SD: 0.55
Steps: 1.17e+04
TotalSteps: 1.44e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00378


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0003   0.0017   8.4490   2.3854   3.1146
ADVA:  (17721,) (35107,) 0.5047711282650184
ADV1:  0.0011211217688721579 0.0006618562129506554 0.008485877170362893 0.06011000

attitude |   -0.05    0.03    0.02 |    1.31    0.69    1.95 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |    0.03   -0.06 |    0.68    1.95 |   -1.46   -3.14 |    1.54    3.14
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.02 |    0.04    0.02    0.01
w_rewards |   -0.33 |    0.20 |   -1.07 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.26 |    0.29 |    0.00 |    2.37
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -0.99 |    0.98    0.99
cs_angles |  0.0032  0.0026 |  0.0847  0.0808 | -0.9833 -0.9863 |  0.9830  0.9923
optical_flow |  0.0002  0.0000 |  0.0226  0.0227 | -1.0634 -1.0780 |  1.1959  1.1155
v_err    | -0.0107 |  0.0598 | -0.4533 |  0.1060
landing_rewards |    9.39 |    2.40 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.4407   0.1785   0.8063  82.6183  56.9741  26.3264
Update Cnt = 1310    ET =   1539.1   Stats:  Mean, Std, Min, Max
r_f      |   -5.68  -18.33   -4.83 |  196.29  167.67  197.65 | -392.15 -376.21 -392.30 |  376.84  383.15  369.35
v_f      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.11 |    0.09    0.10    0.10
r_i      |  -19.88  -42.23   -1.50 |  686.44  641.76  770.98 |-1345.10-1306.50-1277.13 | 1284.61 1218.80 1299.06
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.08    0.10
norm_rf  |    0.20 |    0.08 |    0.03 |    0.51
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.35 |    1.90 |    0.01 |   15.41
thrust   |    0.00    0.00    0.00 |    0.67    0.68    0.67 |   -3.45   -3.45   -3.34 |    3.40    3.45    3.44
norm_thrust |    0.90 |    0.73 |    0.00 |    3.46
fuel     |    1.56 |    0.18 |    1.11 |    2.10
rewards  |  -17.84 

ADVA:  (18311,) (34917,) 0.5244150413838531
ADV1:  0.0013080564670723445 0.0002683062635987512 0.009159593386448324 0.05730539578846744 -0.11576288231478749
ADVB:  (23215,) (34917,) 0.6648623879485638
ADV2:  0.28209623047860194 0.4084771897850215 0.47021532122478954 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5934   0.2142   1.0167  82.6183  56.9741  26.3264
***** Episode 40911, Mean R = -17.7  Std R = 5.3  Min R = -28.7
PolicyLoss: 2.02
Policy_Entropy: 0.196
Policy_KL: 0.00551
Policy_SD: 0.544
Steps: 1.15e+04
TotalSteps: 1.47e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00315


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0013   0.0046   8.4490   2.3854   3.1146
ADVA:  (17523,) (35055,) 0.49987163029525034
ADV1:  0.0006979060024412016 -0.00011366782005104577 0.009205251477194042 0.06772586051438073 -0.09399576685802102
ADVB:  (23179,) (35055,) 0.6612180858650691
ADV2:  0.2785743804026046 0.4248337832292536 0.49490338273093

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0014   0.0054   8.4490   2.3854   3.1146
ADVA:  (19266,) (35071,) 0.54934276182601
ADV1:  8.5505452623591e-05 -0.0006445267791374592 0.009111737795334134 0.046900884772661355 -0.055442908902963
ADVB:  (20109,) (35071,) 0.5733797154344045
ADV2:  0.13517623134188106 0.3676899493260611 0.4967839734630106 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4860   0.6697   3.0746  82.6183  56.9741  26.3264
***** Episode 41159, Mean R = -18.4  Std R = 4.3  Min R = -25.2
PolicyLoss: 2.11
Policy_Entropy: 0.196
Policy_KL: 0.00686
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 1.48e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00366


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0011   0.0043   8.4490   2.3854   3.1146
ADVA:  (19585,) (35166,) 0.556929989194108
ADV1:  0.0015367435738270104 0.0008208648765243002 0.009235600142089502 0.1178748207489711 -0.08907683959895074
ADVB:  (2248

***** Episode 41376, Mean R = -19.6  Std R = 6.6  Min R = -36.5
PolicyLoss: 2.11
Policy_Entropy: 0.195
Policy_KL: 0.00559
Policy_SD: 0.549
Steps: 1.16e+04
TotalSteps: 1.49e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.00431


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0032   8.4490   2.3854   3.1146
ADVA:  (21152,) (35081,) 0.6029474644394401
ADV1:  0.0018136399294005016 0.0005243958170137183 0.011170023135786267 0.046990805920191225 -0.06707613982690236
ADVB:  (21028,) (35081,) 0.5994127875488156
ADV2:  0.20214021828369094 0.39394647006863426 0.48386057615343364 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8669   0.3633   1.5293  82.6183  56.9741  26.3264
***** Episode 41407, Mean R = -19.7  Std R = 7.0  Min R = -35.6
PolicyLoss: 2.18
Policy_Entropy: 0.195
Policy_KL: 0.00574
Policy_SD: 0.55
Steps: 1.17e+04
TotalSteps: 1.49e+07
VF_0_ExplainedVarNew: 0.981
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.00389


ValFun  Gra

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4635   0.5945   2.9930  82.6183  56.9741  26.3264
***** Episode 41624, Mean R = -18.7  Std R = 7.7  Min R = -37.8
PolicyLoss: 2.22
Policy_Entropy: 0.195
Policy_KL: 0.0048
Policy_SD: 0.546
Steps: 1.2e+04
TotalSteps: 1.5e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00333


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0007   8.4490   2.3854   3.1146
ADVA:  (20541,) (35350,) 0.5810749646393211
ADV1:  0.0009944443398460182 0.00012563498142083594 0.009226000607200585 0.07680744536967765 -0.08898743581164986
ADVB:  (21102,) (35350,) 0.596944837340877
ADV2:  0.18111697727020057 0.3741165519888783 0.47637635612045853 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0705   0.4863   2.3196  82.6183  56.9741  26.3264
***** Episode 41655, Mean R = -18.2  Std R = 6.1  Min R = -34.4
PolicyLoss: 2.06
Policy_Entropy: 0.196
Policy_KL: 0.00598
Policy_SD: 0.548
Steps: 1.16e+04
TotalSteps

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |    0.10   -0.03 |    0.64    1.94 |   -1.50   -3.13 |    1.48    3.14
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.03 |    0.03    0.02    0.02
w_rewards |   -0.31 |    0.21 |   -0.93 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.26 |    0.30 |    0.00 |    2.53
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.97   -0.97 |    0.98    0.99
cs_angles |  0.0038  0.0016 |  0.0843  0.0837 | -0.9723 -0.9678 |  0.9843  0.9865
optical_flow |  0.0000  0.0002 |  0.0217  0.0219 | -1.3214 -0.8802 |  1.0535  1.1291
v_err    | -0.0107 |  0.0596 | -0.4534 |  0.1127
landing_rewards |    8.94 |    3.08 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -22.74 |    5.28 |  -49.73 |  -14.34
steps    |     380 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6748   0.2377   1.1858  82.6183  56.9741  26.3264
Update Cnt = 1360    ET =   1456.9   Stats:  Mean, Std, Min, Max
r_f      |    2.14   14.52  -17.44 |  176.40  182.45  204.69 | -385.72 -380.21 -387.78 |  384.62  390.47  392.67
v_f      |    0.00   -0.00    0.00 |    0.05    0.05    0.05 |   -0.09   -0.10   -0.10 |    0.10    0.11    0.10
r_i      |  -16.34   55.02  -63.33 |  674.46  677.53  758.70 |-1288.30-1297.73-1306.46 | 1266.23 1375.27 1342.28
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.19 |    0.08 |    0.02 |    0.47
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.25 |    2.26 |    0.01 |   27.03
thrust   |    0.00    0.00    0.00 |    0.67    0.68    0.67 |   -3.39   -3.36   -3.46 |    3.42    3.30    3.38
norm_thrust |    0.91 |    0.73 |    0.00 |    3.46
fuel     |    1.55 |    0.17 |    1.07 |    2.26
rewards  |  -18.05 

ADVA:  (18528,) (35334,) 0.5243674647648158
ADV1:  0.001730946904802598 0.0008376729401863948 0.00883716562135353 0.03821304802646619 -0.10934502999843515
ADVB:  (23688,) (35334,) 0.6704024452368823
ADV2:  0.3298597033942371 0.46657560596549547 0.5201434731130464 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9814   0.7147   3.1276  82.6183  56.9741  26.3264
***** Episode 42461, Mean R = -16.2  Std R = 5.5  Min R = -34.1
PolicyLoss: 2.29
Policy_Entropy: 0.197
Policy_KL: 0.00743
Policy_SD: 0.538
Steps: 1.17e+04
TotalSteps: 1.53e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00348


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0006   0.0026   8.4490   2.3854   3.1146
ADVA:  (20149,) (34954,) 0.5764433255135321
ADV1:  0.0009910052378992 7.12205455603282e-05 0.008684493772955399 0.03821304802646619 -0.0614986433101824
ADVB:  (20888,) (34954,) 0.5975853979515935
ADV2:  0.17884206798490537 0.3629650120080026 0.46595468847770566 3.0 0.

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0015   0.0054   8.4490   2.3854   3.1146
ADVA:  (20983,) (34820,) 0.6026134405514072
ADV1:  0.0019233751546182789 0.0006089767066546042 0.010193681556997606 0.08065295922063598 -0.06150388654620736
ADVB:  (22354,) (34820,) 0.6419873635841471
ADV2:  0.27376477401245697 0.42377176537200156 0.4924761501712144 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7253   0.2003   1.1377  82.6183  56.9741  26.3264
***** Episode 42709, Mean R = -16.5  Std R = 5.8  Min R = -36.6
PolicyLoss: 2.17
Policy_Entropy: 0.197
Policy_KL: 0.00728
Policy_SD: 0.546
Steps: 1.17e+04
TotalSteps: 1.54e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00306


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0007   0.0029   8.4490   2.3854   3.1146
ADVA:  (18833,) (34808,) 0.5410537807400597
ADV1:  0.0019782766987018266 0.001134868034230947 0.009216502561734296 0.08065295922063598 -0.07945650461834886
ADVB: 

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0001   0.0006   8.4490   2.3854   3.1146
ADVA:  (18164,) (34994,) 0.519060410356061
ADV1:  5.7208767117946854e-05 -0.0004303559112792025 0.00831055921526402 0.042115352339197776 -0.08813280684595376
ADVB:  (20342,) (34994,) 0.5812996513688061
ADV2:  0.15323594221175185 0.36498257122820404 0.48572000380091196 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4566   0.5466   2.6433  82.6183  56.9741  26.3264
***** Episode 42957, Mean R = -17.5  Std R = 6.2  Min R = -33.6
PolicyLoss: 2.06
Policy_Entropy: 0.197
Policy_KL: 0.00742
Policy_SD: 0.542
Steps: 1.18e+04
TotalSteps: 1.55e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00293


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0017   8.4490   2.3854   3.1146
ADVA:  (22054,) (35463,) 0.6218876011617742
ADV1:  0.0008114642010328

***** Episode 43174, Mean R = -19.5  Std R = 6.0  Min R = -35.7
PolicyLoss: 0.237
Policy_Entropy: 0.197
Policy_KL: 0.00336
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 1.56e+07
VF_0_ExplainedVarNew: 0.954
VF_0_ExplainedVarOld: 0.935
VF_0_Loss : 0.0113


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0170   0.0106   0.0398   8.4490   2.3854   3.1146
ADVA:  (21677,) (34596,) 0.6265753266273557
ADV1:  0.0 -0.010161053834125083 0.13933253634753914 0.09501571243668205 -4.9598975370358165
ADVB:  (18647,) (34596,) 0.5389929471615216
ADV2:  0.008606685296727637 0.04068251119436289 0.06592790174353955 0.9823166155364416 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.1364   0.0556   0.2965  82.6183  56.9741  26.3264
***** Episode 43205, Mean R = -26.0  Std R = 37.0  Min R = -226.2
PolicyLoss: 0.251
Policy_Entropy: 0.195
Policy_KL: 0.00328
Policy_SD: 0.549
Steps: 1.13e+04
TotalSteps: 1.56e

cs_angles |  0.0036  0.0001 |  0.0851  0.0875 | -0.9303 -0.9429 |  0.9803  0.9986
optical_flow |  0.0000  0.0002 |  0.0214  0.0234 | -0.9836 -0.9737 |  0.9789  1.1351
v_err    | -0.0114 |  0.0629 | -1.0171 |  0.1588
landing_rewards |    9.35 |    2.46 |    0.00 |   10.00
landing_margin |    3.32 |   58.72 |   -0.06 | 1035.55
tracking_rewards |  -23.07 |    7.38 | -112.78 |  -14.54
steps    |     376 |      27 |      92 |     418
***** Episode 43453, Mean R = -14.5  Std R = 3.6  Min R = -24.2
PolicyLoss: 2.34
Policy_Entropy: 0.198
Policy_KL: 0.00839
Policy_SD: 0.542
Steps: 1.16e+04
TotalSteps: 1.57e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00826


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0013   0.0044   8.4490   2.3854   3.1146
ADVA:  (19416,) (35049,) 0.5539673029187708
ADV1:  0.0016348763316561925 0.0008361796263274776 0.010700297749446284 0

attitude |   -0.21    0.03    0.04 |    1.17    0.65    1.81 |   -3.14   -1.56   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.07   -0.05 |    0.08    0.05    0.07
a_f      |    0.02   -0.00 |    0.65    1.81 |   -1.48   -3.12 |    1.43    3.13
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.03    0.02    0.02
w_rewards |   -0.30 |    0.20 |   -1.22 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.29 |    0.00 |    2.83
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.95   -0.99 |    1.00    0.99
cs_angles |  0.0032  0.0046 |  0.0794  0.0807 | -0.9508 -0.9890 |  0.9978  0.9945
optical_flow |  0.0001  0.0001 |  0.0214  0.0228 | -0.7822 -0.9991 |  1.0771  1.1355
v_err    | -0.0112 |  0.0600 | -0.4521 |  0.1237
landing_rewards |    8.97 |    3.04 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.3937   1.0421   4.6789 139.5036  58.0549  47.4570
Update Cnt = 1420    ET =   1787.0   Stats:  Mean, Std, Min, Max
r_f      |    4.25   15.76    2.83 |  189.98  171.01  195.50 | -391.79 -368.90 -385.84 |  387.94  390.33  370.17
v_f      |   -0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.10    0.09    0.10
r_i      |    7.26   39.68   29.84 |  704.88  653.12  755.28 |-1357.60-1323.69-1298.91 | 1363.42 1301.27 1336.12
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.19 |    0.08 |    0.04 |    0.48
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.32 |    2.31 |    0.01 |   30.15
thrust   |    0.00   -0.00    0.00 |    0.67    0.67    0.67 |   -3.43   -3.35   -3.40 |    3.46    3.45    3.42
norm_thrust |    0.91 |    0.73 |    0.00 |    3.46
fuel     |    1.55 |    0.18 |    1.15 |    2.14
rewards  |  -17.92 

ADVA:  (19405,) (35122,) 0.5525027048573543
ADV1:  0.0005144670073569096 -8.789417392759373e-05 0.010629217816986114 0.22667742628505663 -0.09271084028323201
ADVB:  (20800,) (35122,) 0.5922213996925004
ADV2:  0.16861785234655358 0.36276540468974283 0.4795877437172088 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3033   0.4464   2.3851 139.5036  58.0549  47.4570
***** Episode 44321, Mean R = -19.3  Std R = 6.5  Min R = -37.3
PolicyLoss: 2
Policy_Entropy: 0.198
Policy_KL: 0.00639
Policy_SD: 0.553
Steps: 1.18e+04
TotalSteps: 1.6e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00767


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0005   0.0021   8.4490   2.3854   3.1146
ADVA:  (22398,) (35030,) 0.6393948044533257
ADV1:  0.00024799095150926896 -0.0009192419901806454 0.011583242193467963 0.22667742628505663 -0.09271084028323201
ADVB:  (18488,) (35030,) 0.5277761918355696
ADV2:  0.0454013269035308 0.27011906292818244 0.4102436565246079 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0035   8.4490   2.3854   3.1146
ADVA:  (19038,) (35223,) 0.5404991056979814
ADV1:  0.0001414187101554847 -0.00017185785599930015 0.008499510632993163 0.06212315846660765 -0.0818020240811893
ADVB:  (19361,) (35223,) 0.5496692502058315
ADV2:  0.11735759404389343 0.3810240609783116 0.5188816850666064 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.8579   0.6903   3.2184 139.5036  58.0549  47.4570
***** Episode 44569, Mean R = -17.2  Std R = 5.1  Min R = -28.7
PolicyLoss: 2.26
Policy_Entropy: 0.198
Policy_KL: 0.00711
Policy_SD: 0.542
Steps: 1.18e+04
TotalSteps: 1.61e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00587


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0005   0.0017   8.4490   2.3854   3.1146
ADVA:  (22265,) (35174,) 0.6329959629271621
ADV1:  0.0 -0.0011317288200391649 0.009396138473932949 0.045402623468871084 -0.0818020240811893
ADVB:  (16533,) (35174

***** Episode 44786, Mean R = -19.2  Std R = 6.3  Min R = -39.7
PolicyLoss: 2.06
Policy_Entropy: 0.198
Policy_KL: 0.00561
Policy_SD: 0.551
Steps: 1.17e+04
TotalSteps: 1.62e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.0059


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0018   0.0068   8.4490   2.3854   3.1146
ADVA:  (19756,) (35167,) 0.5617766656240225
ADV1:  0.0016854527832734327 0.00026263666933233376 0.010301998051210588 0.05252606678425997 -0.08238725515593748
ADVB:  (22897,) (35167,) 0.6510933545653596
ADV2:  0.288336834651624 0.4244447135651937 0.49099068914670496 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3773   0.5176   2.3329 139.5036  58.0549  47.4570
***** Episode 44817, Mean R = -18.2  Std R = 5.0  Min R = -29.5
PolicyLoss: 2.13
Policy_Entropy: 0.199
Policy_KL: 0.00491
Policy_SD: 0.551
Steps: 1.17e+04
TotalSteps: 1.62e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00527


ValFun  Gradie

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9464   0.3216   1.6636 139.5036  58.0549  47.4570
***** Episode 45034, Mean R = -17.7  Std R = 5.9  Min R = -30.7
PolicyLoss: 1.81
Policy_Entropy: 0.197
Policy_KL: 0.00702
Policy_SD: 0.553
Steps: 1.18e+04
TotalSteps: 1.63e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00557


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0013   8.4490   2.3854   3.1146
ADVA:  (20960,) (35391,) 0.5922409652171456
ADV1:  0.0009366607956065942 -3.592440571574166e-05 0.009667562328892517 0.06490136798108193 -0.0881328279083331
ADVB:  (21611,) (35391,) 0.6106354722952163
ADV2:  0.18132089274952168 0.35279889222971167 0.458230710343482 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7541   0.2826   1.2209 139.5036  58.0549  47.4570
***** Episode 45065, Mean R = -18.5  Std R = 6.1  Min R = -35.2
Policy

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |   -0.00   -0.11 |    0.65    1.91 |   -1.51   -3.12 |    1.50    3.11
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.02 |    0.03    0.02    0.02
w_rewards |   -0.32 |    0.20 |   -0.94 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.29 |    0.00 |    2.77
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -0.99 |    0.99    0.99
cs_angles |  0.0039  0.0045 |  0.0848  0.0807 | -0.9885 -0.9905 |  0.9896  0.9932
optical_flow |  0.0000  0.0001 |  0.0210  0.0223 | -1.0571 -1.1375 |  0.9547  1.0470
v_err    | -0.0110 |  0.0594 | -0.4527 |  0.1240
landing_rewards |    9.35 |    2.46 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -22.21 |    4.84 |  -42.09 |  -14.70
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   7.7155   4.7820  17.0085 139.5036  58.0549  47.4570
Update Cnt = 1470    ET =   1339.0   Stats:  Mean, Std, Min, Max
r_f      |  -20.49    8.03    1.81 |  193.41  172.37  195.54 | -391.50 -360.40 -396.57 |  382.17  384.15  388.74
v_f      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.09    0.12
r_i      |  -55.94   30.71   21.99 |  716.39  640.55  754.95 |-1336.49-1284.25-1289.59 | 1372.69 1321.13 1355.84
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.10    0.09    0.09
norm_rf  |    0.18 |    0.07 |    0.03 |    0.47
norm_vf  |    0.08 |    0.01 |    0.03 |    0.12
gs_f     |    1.17 |    1.78 |    0.01 |   22.33
thrust   |    0.01    0.00   -0.01 |    0.67    0.69    0.68 |   -3.37   -3.46   -3.41 |    3.42    3.44    3.46
norm_thrust |    0.92 |    0.74 |    0.00 |    3.46
fuel     |    1.59 |    0.17 |    1.21 |    2.12
rewards  |  -18.14 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9888   0.8236   4.3127 139.5036  58.0549  47.4570
***** Episode 45871, Mean R = -18.5  Std R = 5.6  Min R = -32.4
PolicyLoss: 1.93
Policy_Entropy: 0.2
Policy_KL: 0.0105
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 1.66e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.0046


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0009   0.0038   8.4490   2.3854   3.1146
ADVA:  (18435,) (35109,) 0.5250790395625053
ADV1:  0.0 -0.00045527921397327465 0.009211609297258366 0.08528745912052516 -0.08993089812255334
ADVB:  (20149,) (35109,) 0.57389843060184
ADV2:  0.12867157176838515 0.3560627369549215 0.4989317433454719 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4167   0.5629   2.7284 139.5036  58.0549  47.4570
***** Episode 45902, Mean R = -18.5  Std R = 6.1  Min R = -37.1
PolicyLoss: 2.01
Policy_Entropy: 0.2
Policy_KL: 0.0101
Policy_SD: 0.546
Steps: 1.17e+04
TotalSteps: 1.66e+07
VF_0_Explaine

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.3523   1.1440   4.7811 139.5036  58.0549  47.4570
***** Episode 46119, Mean R = -16.5  Std R = 4.9  Min R = -28.6
PolicyLoss: 2.24
Policy_Entropy: 0.2
Policy_KL: 0.00914
Policy_SD: 0.531
Steps: 1.18e+04
TotalSteps: 1.67e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00401


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0004   0.0016   8.4490   2.3854   3.1146
ADVA:  (18285,) (35329,) 0.5175634747657731
ADV1:  0.0013764499487813782 0.0011152906130430574 0.007702716844345495 0.07854063152325708 -0.06547338504734752
ADVB:  (23132,) (35329,) 0.6547595459820544
ADV2:  0.3037797526791204 0.4825351888025339 0.573194099523 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.2325   1.1985   3.8770 139.5036  58.0549  47.4570
***** Episode 46150, Mean R = -18.3  Std R = 5.3  Min R = -28.4
PolicyLoss: 2.39
Policy_Entropy: 0.2
Policy_KL: 0.00768
Policy_SD: 0.538
Steps: 1.16e+04
TotalSteps: 1.67e+

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  13.5147   8.7514  39.2052 139.5036  58.0549  47.4570
***** Episode 46367, Mean R = -18.1  Std R = 5.7  Min R = -30.9
PolicyLoss: 1.62
Policy_Entropy: 0.2
Policy_KL: 0.0122
Policy_SD: 0.541
Steps: 1.19e+04
TotalSteps: 1.68e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00427


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0033   8.4490   2.3854   3.1146
ADVA:  (18549,) (35596,) 0.5210978761658613
ADV1:  -0.0008157313247487107 -0.002390475171908532 0.00980120267916958 0.09158281556829512 -0.0937139178401778
ADVB:  (16123,) (35596,) 0.4529441510282054
ADV2:  0.0 0.24496824233917616 0.4386072805769997 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.9589   1.7465   7.4731 139.5036  58.0549  47.4570
***** Episode 46398, Mean R = -19.2  Std R = 5.6  Min R = -33.2
PolicyLoss: 1.75
Policy_E

ADVA:  (20838,) (35392,) 0.5887771247739603
ADV1:  0.0010070944280495984 0.00020877250218731915 0.008612698711266019 0.04501791134216021 -0.05790398445210697
ADVB:  (21113,) (35392,) 0.5965472423146474
ADV2:  0.16983356472926767 0.3598064954686818 0.4701801383386663 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5882   0.2093   1.1212 139.5036  58.0549  47.4570
***** Episode 46615, Mean R = -18.4  Std R = 6.3  Min R = -33.8
PolicyLoss: 1.93
Policy_Entropy: 0.203
Policy_KL: 0.00663
Policy_SD: 0.535
Steps: 1.19e+04
TotalSteps: 1.69e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00442


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0031   8.4490   2.3854   3.1146
ADVA:  (19911,) (35124,) 0.5668773488213188
ADV1:  0.0011001579946979204 0.00064330574881683 0.008384055411079089 0.07218878674305773 -0.06043006479388763
ADVB:  (20972,) (

cs_angles |  0.0057  0.0066 |  0.0829  0.0795 | -0.9682 -0.9952 |  0.9979  0.9873
optical_flow | -0.0000  0.0001 |  0.0222  0.0221 | -1.1385 -1.1174 |  1.0648  1.0828
v_err    | -0.0116 |  0.0604 | -0.4526 |  0.1258
landing_rewards |    9.26 |    2.62 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -22.10 |    5.17 |  -49.23 |  -14.19
steps    |     378 |      21 |     332 |     419
***** Episode 46863, Mean R = -18.7  Std R = 6.1  Min R = -34.6
PolicyLoss: 1.86
Policy_Entropy: 0.203
Policy_KL: 0.00697
Policy_SD: 0.53
Steps: 1.17e+04
TotalSteps: 1.7e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.0038


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0003   0.0009   8.4490   2.3854   3.1146
ADVA:  (19486,) (35288,) 0.5521990478349581
ADV1:  0.00024411584255535322 -0.0005036880166759882 0.010847110871301 0.13481221407578803 -0.1776597755610957
ADVB:  (20509,) (35288,) 0.5811890727726139
ADV2:  0.14187

attitude |    0.10   -0.03    0.03 |    1.23    0.68    1.86 |   -3.14   -1.55   -3.14 |    3.14    1.55    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.09   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |   -0.03    0.05 |    0.68    1.86 |   -1.44   -3.12 |    1.39    3.12
w_f      |    0.01    0.00   -0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.03 |    0.04    0.02    0.02
w_rewards |   -0.24 |    0.20 |   -1.86 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.30 |    0.00 |    2.64
seeker_angles |    0.01    0.00 |    0.08    0.08 |   -0.91   -0.96 |    0.99    0.97
cs_angles |  0.0096  0.0004 |  0.0812  0.0837 | -0.9063 -0.9617 |  0.9877  0.9748
optical_flow | -0.0001  0.0001 |  0.0196  0.0218 | -0.8309 -1.1235 |  0.8882  1.0999
v_err    | -0.0114 |  0.0597 | -0.4515 |  0.1073
landing_rewards |    9.35 |    2.46 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7065   0.2618   1.1583 139.5036  58.0549  47.4570
Update Cnt = 1530    ET =   1419.0   Stats:  Mean, Std, Min, Max
r_f      |    1.14    6.13   -1.20 |  183.29  178.51  200.30 | -393.95 -394.01 -365.83 |  388.63  369.32  391.18
v_f      |   -0.00   -0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.09    0.11    0.11
r_i      |   18.41   32.00   -9.92 |  665.19  677.54  775.63 |-1257.45-1347.35-1309.15 | 1308.55 1314.64 1259.19
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.10    0.10
norm_rf  |    0.17 |    0.07 |    0.02 |    0.46
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.62 |    3.41 |    0.01 |   40.44
thrust   |   -0.00    0.00   -0.01 |    0.67    0.67    0.67 |   -3.45   -3.45   -3.46 |    3.43    3.44    3.46
norm_thrust |    0.91 |    0.73 |    0.00 |    3.46
fuel     |    1.56 |    0.17 |    1.19 |    2.24
rewards  |  -17.85 

ADVA:  (19814,) (35082,) 0.5647910609429337
ADV1:  0.0010293840660036508 9.335585246499987e-05 0.008733450514675037 0.042089531471707686 -0.0693535708312556
ADVB:  (21233,) (35082,) 0.605239153982099
ADV2:  0.1917902538359162 0.3695899927994715 0.4688116358390668 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8240   0.2772   1.2848 139.5036  58.0549  47.4570
***** Episode 47731, Mean R = -16.1  Std R = 4.6  Min R = -27.1
PolicyLoss: 1.98
Policy_Entropy: 0.202
Policy_KL: 0.00756
Policy_SD: 0.541
Steps: 1.15e+04
TotalSteps: 1.73e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00409


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0030   8.4490   2.3854   3.1146
ADVA:  (20769,) (34729,) 0.5980304644533387
ADV1:  0.000998577324058612 -0.0004214332537920621 0.009918215989772603 0.042089531471707686 -0.08984105603343973
ADVB:  (20505,) (34729,) 0.5904287483083303
ADV2:  0.15033880714633055 0.32665660622491355 0.42014912516280933

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0008   8.4490   2.3854   3.1146
ADVA:  (18789,) (35163,) 0.5343400733725792
ADV1:  0.00016449531544419048 0.00014105247909269072 0.007614258481401426 0.11130585983003571 -0.08470576965618642
ADVB:  (19494,) (35163,) 0.5543895572050166
ADV2:  0.10529201879297191 0.3702001024813891 0.5329974345840045 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9145   0.8645   4.2591 139.5036  58.0549  47.4570
***** Episode 47979, Mean R = -18.1  Std R = 5.9  Min R = -35.7
PolicyLoss: 2.15
Policy_Entropy: 0.202
Policy_KL: 0.00894
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 1.74e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.0046


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0017   8.4490   2.3854   3.1146
ADVA:  (20356,) (34908,) 0.5831328062335281
ADV1:  0.00114151853557196

***** Episode 48196, Mean R = -16.8  Std R = 6.0  Min R = -36.5
PolicyLoss: 2
Policy_Entropy: 0.202
Policy_KL: 0.00662
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 1.75e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.00465


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0029   8.4490   2.3854   3.1146
ADVA:  (20753,) (35408,) 0.5861104835065521
ADV1:  0.0004330544697897388 -0.00019576841886450946 0.008586094394600199 0.06704935918312843 -0.10355449858596866
ADVB:  (19111,) (35408,) 0.5397367826479892
ADV2:  0.06510378933979308 0.30379547609189156 0.447914900737298 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3269   0.5907   2.6281 139.5036  58.0549  47.4570
***** Episode 48227, Mean R = -17.9  Std R = 5.2  Min R = -27.7
PolicyLoss: 1.82
Policy_Entropy: 0.201
Policy_KL: 0.00667
Policy_SD: 0.537
Steps: 1.19e+04
TotalSteps: 1.75e+0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.2196   2.5111   8.5213 139.5036  58.0549  47.4570
***** Episode 48444, Mean R = -16.6  Std R = 4.7  Min R = -28.2
PolicyLoss: 2.36
Policy_Entropy: 0.202
Policy_KL: 0.00883
Policy_SD: 0.548
Steps: 1.18e+04
TotalSteps: 1.76e+07
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00406


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0033   0.0019   0.0067   8.4490   2.3854   3.1146
ADVA:  (20266,) (35264,) 0.5746937386569873
ADV1:  0.0001493960726593052 -0.0004485768417841754 0.008101084186398782 0.04365004064384126 -0.06612452508646322
ADVB:  (17186,) (35264,) 0.48735254083484575
ADV2:  0.0 0.2579173584980589 0.4141296789325678 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3251   0.5450   2.4912 139.5036  58.0549  47.4570
***** Episode 48475, Mean R = -17.1  Std R = 6.3  Min R = -41.1
PolicyLoss: 1.69
Policy_Entropy: 0.202
Policy_KL: 0.00769
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 1.76e+07
VF

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |   -0.01   -0.01 |    0.69    1.80 |   -1.55   -3.13 |    1.52    3.14
w_f      |    0.01    0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.03    0.02    0.02
w_rewards |   -0.21 |    0.17 |   -1.11 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.29 |    0.00 |    3.04
seeker_angles |    0.01    0.00 |    0.08    0.08 |   -0.99   -0.98 |    0.98    0.97
cs_angles |  0.0056  0.0025 |  0.0817  0.0796 | -0.9891 -0.9800 |  0.9839  0.9731
optical_flow | -0.0000  0.0001 |  0.0198  0.0225 | -0.8360 -0.9498 |  1.1099  1.1902
v_err    | -0.0113 |  0.0598 | -0.4525 |  0.1300
landing_rewards |    9.29 |    2.57 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -21.85 |    4.32 |  -41.12 |  -13.42
steps    |     380 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9894   0.3718   1.8544 139.5036  58.0549  47.4570
Update Cnt = 1580    ET =   1366.4   Stats:  Mean, Std, Min, Max
r_f      |   11.55   -5.07   16.32 |  186.47  174.73  191.64 | -392.44 -393.16 -383.31 |  362.83  390.61  389.06
v_f      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.12 |    0.10    0.09    0.11
r_i      |   56.69  -28.15   41.29 |  695.58  659.16  746.08 |-1298.84-1369.43-1314.92 | 1225.49 1317.88 1321.21
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.17 |    0.07 |    0.02 |    0.35
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.41 |    2.81 |    0.00 |   24.93
thrust   |    0.00   -0.00    0.00 |    0.67    0.67    0.67 |   -3.46   -3.42   -3.46 |    3.45    3.40    3.42
norm_thrust |    0.89 |    0.73 |    0.00 |    3.46
fuel     |    1.55 |    0.18 |    1.15 |    2.23
rewards  |  -17.34 

ADVA:  (19494,) (34845,) 0.5594489883770986
ADV1:  0.0005900513380192127 -8.136941317030382e-05 0.008215326435904186 0.055039346667209055 -0.08137731551526883
ADVB:  (20340,) (34845,) 0.5837279380111924
ADV2:  0.15511962107987157 0.35271703552272526 0.46601175446926973 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6826   0.2355   1.2682 139.5036  58.0549  47.4570
***** Episode 49281, Mean R = -16.9  Std R = 5.8  Min R = -33.9
PolicyLoss: 1.93
Policy_Entropy: 0.204
Policy_KL: 0.00679
Policy_SD: 0.536
Steps: 1.16e+04
TotalSteps: 1.79e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.00386


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0023   0.0083   8.4490   2.3854   3.1146
ADVA:  (20770,) (34982,) 0.5937339203018696
ADV1:  0.0 -0.0013643429114362075 0.008977729991796888 0.055039346667209055 -0.08137731551526883
ADVB:  (16927,) (34982,) 0.48387742267451833
ADV2:  0.0 0.23963752397495794 0.3856970305458861 3.0 0.0
Policy  Gradients:

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0005   0.0019   8.4490   2.3854   3.1146
ADVA:  (21302,) (35023,) 0.6082288781657768
ADV1:  0.0011405874861550765 0.0007188665682352044 0.007528441677929294 0.04948245635604398 -0.06392349287024007
ADVB:  (20186,) (35023,) 0.5763641035890701
ADV2:  0.16121292809174168 0.3890434050551409 0.5123193359803541 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6921   0.2292   1.1244 139.5036  58.0549  47.4570
***** Episode 49529, Mean R = -17.0  Std R = 6.7  Min R = -41.0
PolicyLoss: 2.17
Policy_Entropy: 0.204
Policy_KL: 0.0056
Policy_SD: 0.533
Steps: 1.17e+04
TotalSteps: 1.8e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.0044


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0023   8.4490   2.3854   3.1146
ADVA:  (19569,) (35152,) 0.5566966317705963
ADV1:  0.00033328946215891754

***** Episode 49746, Mean R = -17.0  Std R = 6.1  Min R = -29.9
PolicyLoss: 2.08
Policy_Entropy: 0.204
Policy_KL: 0.00599
Policy_SD: 0.535
Steps: 1.17e+04
TotalSteps: 1.81e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00427


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0012   8.4490   2.3854   3.1146
ADVA:  (19153,) (35159,) 0.5447538325890953
ADV1:  0.0010651363752565736 0.0006807683224496707 0.007296625675402074 0.05302559197489132 -0.059841221701089645
ADVB:  (21789,) (35159,) 0.6197275235359367
ADV2:  0.23471985591030056 0.44011831229156906 0.542593011406974 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2626   0.5641   2.4536 139.5036  58.0549  47.4570
***** Episode 49777, Mean R = -17.4  Std R = 5.4  Min R = -30.8
PolicyLoss: 2.27
Policy_Entropy: 0.205
Policy_KL: 0.00474
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 1.81e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.0048


Dynamics: Ma

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0602   0.3690   1.8874 139.5036  58.0549  47.4570
***** Episode 49994, Mean R = -18.1  Std R = 5.2  Min R = -34.1
PolicyLoss: 2.08
Policy_Entropy: 0.205
Policy_KL: 0.00796
Policy_SD: 0.54
Steps: 1.18e+04
TotalSteps: 1.82e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.004


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0016   8.4490   2.3854   3.1146
ADVA:  (19919,) (35075,) 0.5678973627940128
ADV1:  0.0 -0.0008345231355234362 0.008292274278960466 0.0742647544906273 -0.10513973169759372
ADVB:  (17511,) (35075,) 0.4992444761225944
ADV2:  0.0 0.30722676660415976 0.4562979386356439 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5269   0.5601   2.5357 139.5036  58.0549  47.4570
***** Episode 50025, Mean R = -18.7  Std R = 8.0  Min R = -46.9
PolicyLoss: 1.97
Policy_Entropy: 0.205
Policy_KL: 0.00864
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 1.82e+07
VF_0_ExplainedVarNew: 0

seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -0.95 |    0.99    0.99
cs_angles |  0.0039  0.0017 |  0.0820  0.0792 | -0.9906 -0.9497 |  0.9855  0.9929
optical_flow |  0.0000  0.0001 |  0.0202  0.0223 | -1.4227 -1.0092 |  1.0165  1.1213
v_err    | -0.0112 |  0.0596 | -0.4525 |  0.1308
landing_rewards |    9.65 |    1.85 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -21.98 |    4.85 |  -49.54 |  -14.18
steps    |     379 |      20 |     334 |     418
***** Episode 50273, Mean R = -15.9  Std R = 4.0  Min R = -27.8
PolicyLoss: 2.22
Policy_Entropy: 0.205
Policy_KL: 0.00493
Policy_SD: 0.53
Steps: 1.17e+04
TotalSteps: 1.83e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00581


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0048   8.4490   2.3854   3.1146
ADVA:  (20460,) (35127,) 0.5824579383380306
ADV1:  0.0003988106237270868 -2.882246984683223e-05 0.007522745944720624 0.046341

attitude |   -0.06   -0.01   -0.02 |    1.26    0.67    1.83 |   -3.14   -1.55   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |   -0.02   -0.02 |    0.67    1.82 |   -1.40   -3.14 |    1.54    3.10
w_f      |    0.01    0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.02 |    0.04    0.02    0.01
w_rewards |   -0.23 |    0.17 |   -1.12 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.30 |    0.00 |    2.75
seeker_angles |    0.01    0.01 |    0.08    0.09 |   -0.97   -1.00 |    1.00    1.00
cs_angles |  0.0059  0.0052 |  0.0804  0.0855 | -0.9714 -0.9956 |  0.9997  0.9996
optical_flow |  0.0001 -0.0000 |  0.0209  0.0221 | -0.8449 -1.0434 |  1.1089  1.3731
v_err    | -0.0113 |  0.0602 | -0.4554 |  0.1283
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (17781,) (35225,) 0.5047835344215755
ADV1:  0.0 -0.0004815085623179944 0.007964247780154381 0.048728557617527146 -0.09373408921397763
ADVB:  (20080,) (35225,) 0.5700496806245564
ADV2:  0.13589028998074598 0.3812699891678496 0.5115353258716765 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6910   0.2692   1.3715 139.5036  58.0549  47.4570
Update Cnt = 1640    ET =   1390.5   Stats:  Mean, Std, Min, Max
r_f      |    0.46  -10.16    8.80 |  183.87  174.68  194.95 | -391.44 -378.22 -391.14 |  394.34  394.88  395.02
v_f      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.09 |    0.09    0.10    0.10
r_i      |   -4.39  -69.75   34.19 |  689.19  654.22  758.03 |-1306.93-1337.07-1231.35 | 1342.18 1266.72 1385.38
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.17 |    0.07 |    0.03 |    0.48
norm_vf  |    0.07 |    0.01 |    0.04 |    0.11
gs_f     |    1.31 |    

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0019   0.0068   8.4490   2.3854   3.1146
ADVA:  (20876,) (35079,) 0.5951138858006214
ADV1:  0.0 -0.0010987712239461788 0.00988800709234573 0.05292396440580638 -0.07137461651319454
ADVB:  (17856,) (35079,) 0.5090224920892842
ADV2:  0.02039739939500344 0.34244076847953475 0.4957549916335984 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7161   0.7269   3.3926 139.5036  58.0549  47.4570
***** Episode 51141, Mean R = -18.9  Std R = 6.1  Min R = -35.1
PolicyLoss: 2.16
Policy_Entropy: 0.205
Policy_KL: 0.012
Policy_SD: 0.539
Steps: 1.16e+04
TotalSteps: 1.86e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.00408


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0027   8.4490   2.3854   3.1146
ADVA:  (19849,) (34964,) 0.5676982038668344
ADV1:  0.0 -0.0008458287293229021 0.009418865

***** Episode 51358, Mean R = -16.1  Std R = 4.0  Min R = -28.1
PolicyLoss: 0.599
Policy_Entropy: 0.204
Policy_KL: 0.00441
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 1.87e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.00386


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00136046 0.00142963] 0.0023641748217967627
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0016   0.0059   8.4490   2.3854   3.1146
ADVA:  (18484,) (34996,) 0.5281746485312607
ADV1:  0.0017843461231492183 0.00113152299298298 0.011094953332632823 0.13853027931607487 -0.14626699914284996
ADVB:  (22799,) (34996,) 0.6514744542233398
ADV2:  0.31039350459575477 0.47766289417636104 0.5663474613548422 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  13.0292   8.4798  34.0153 139.5036  58.0549  47.4570
***** Episode 51389, Mean R = -15.9  Std R = 4.3  Min R = -24.1
PolicyLoss: 2.37
Policy_Entropy: 0.205
Policy_KL: 0.00613
Policy_SD: 0.535
Steps: 1.16e+04
TotalSteps: 1.87e+

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2552   0.5627   2.5898 139.5036  58.0549  47.4570
***** Episode 51606, Mean R = -16.4  Std R = 4.5  Min R = -31.3
PolicyLoss: 2.1
Policy_Entropy: 0.204
Policy_KL: 0.00806
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 1.88e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00468


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0016   0.0058   8.4490   2.3854   3.1146
ADVA:  (20647,) (35254,) 0.5856640381233336
ADV1:  6.754159062349885e-05 -0.0004315412706458194 0.008474447105968378 0.060515988868289794 -0.11050849135879026
ADVB:  (18318,) (35254,) 0.5196006126964317
ADV2:  0.03476551413616607 0.3079155878597262 0.4661480271328302 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9111   0.3833   1.7158 139.5036  58.0549  47.4570
***** Episode 51637, Mean R = -18.4  Std R = 6.2  Min R = -34.4
PolicyLoss: 1.9
Policy_Entropy: 0.205
Policy_KL: 0.0084
Policy_SD: 0.542
Steps: 1.18e+04
TotalStep

ADVA:  (20889,) (35399,) 0.5901014152942173
ADV1:  0.0002675632115630112 -0.0007761011487254133 0.009343411468057543 0.07047432263058673 -0.06991893195141674
ADVB:  (20002,) (35399,) 0.5650442102884262
ADV2:  0.10712938357010782 0.3115396257605536 0.43708903064080484 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9243   0.6297   3.2181 139.5036  79.4226  47.4570
***** Episode 51854, Mean R = -17.6  Std R = 4.9  Min R = -27.0
PolicyLoss: 1.77
Policy_Entropy: 0.205
Policy_KL: 0.0163
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 1.89e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.0047


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0012   0.0046   8.4490   2.3854   3.1146
ADVA:  (19731,) (35293,) 0.5590627036522823
ADV1:  0.0010951503312238275 0.000411615379538724 0.008391514363500208 0.07047432263058673 -0.09448146253408163
ADVB:  (22269,) (35293,) 0.6309749808743944
ADV2:  0.227268928328106 0.4018425504757087 0.49974637128942684 3.

attitude |   -0.12    0.01    0.15 |    1.27    0.70    1.90 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |    0.02    0.08 |    0.68    1.91 |   -1.54   -3.13 |    1.48    3.12
w_f      |    0.01    0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.04    0.02    0.02
w_rewards |   -0.22 |    0.16 |   -1.08 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.30 |    0.00 |    2.52
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.95   -0.97 |    0.96    0.99
cs_angles |  0.0009  0.0029 |  0.0836  0.0817 | -0.9492 -0.9736 |  0.9642  0.9919
optical_flow |  0.0001  0.0000 |  0.0202  0.0203 | -1.0382 -1.2692 |  1.0024  1.0694
v_err    | -0.0111 |  0.0601 | -0.4529 |  0.0983
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.3352   0.1185   0.6387 139.5036  79.4226  47.4570
Update Cnt = 1690    ET =   1294.3   Stats:  Mean, Std, Min, Max
r_f      |  -15.29   -8.99   10.56 |  188.82  162.08  212.42 | -387.52 -635.87 -390.04 |  855.95  377.27  933.03
v_f      |    0.01    0.00    0.00 |    0.06    0.06    0.10 |   -0.10   -0.10   -0.09 |    0.80    0.73    1.47
r_i      |  -58.93  -27.14   23.31 |  672.45  630.35  789.13 |-1341.52-1331.46-1341.73 | 1286.98 1358.87 1333.22
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.09    0.10    0.09
norm_rf  |    3.60 |   60.24 |    0.01 | 1062.61
norm_vf  |    0.08 |    0.10 |    0.04 |    1.83
gs_f     |    1.49 |    2.46 |    0.00 |   25.52
thrust   |    0.00    0.00   -0.00 |    0.66    0.66    0.67 |   -3.43   -3.38   -3.45 |    3.45    3.46    3.43
norm_thrust |    0.88 |    0.73 |    0.00 |    3.46
fuel     |    1.53 |    0.18 |    1.09 |    2.10
rewards  |  -17.31 

ADVA:  (20117,) (35349,) 0.5690967212650995
ADV1:  0.0 -0.0014877280337904386 0.009473340438982927 0.0710342280087714 -0.0720937056937474
ADVB:  (17539,) (35349,) 0.496166793968712
ADV2:  0.0 0.26180018795733095 0.43390514768525523 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.8683   2.2447   9.2420 139.5036  79.4226  47.4570
***** Episode 52691, Mean R = -16.9  Std R = 6.2  Min R = -35.6
PolicyLoss: 1.68
Policy_Entropy: 0.207
Policy_KL: 0.00837
Policy_SD: 0.53
Steps: 1.18e+04
TotalSteps: 1.92e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00437


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0022   8.4490   2.3854   3.1146
ADVA:  (21484,) (35408,) 0.6067555354722096
ADV1:  0.0 -0.0009246130721976382 0.009330749901814227 0.0710342280087714 -0.08500638560452012
ADVB:  (16931,) (35408,) 0.47816877541798464
ADV2:  0.0 0.26618152460

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0009   0.0034   8.4490   2.3854   3.1146
ADVA:  (20847,) (35084,) 0.5942024854634591
ADV1:  0.00041326949283288644 -0.0005164871158720501 0.009045734768556773 0.07067788433519917 -0.06859763748647002
ADVB:  (19759,) (35084,) 0.5631911982670164
ADV2:  0.10953203863639713 0.31898128133584674 0.4345129068306706 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8799   0.3686   1.4993 139.5036  79.4226  47.4570
***** Episode 52939, Mean R = -18.3  Std R = 5.9  Min R = -33.4
PolicyLoss: 1.79
Policy_Entropy: 0.208
Policy_KL: 0.00646
Policy_SD: 0.534
Steps: 1.16e+04
TotalSteps: 1.93e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00396


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0013   0.0050   8.4490   2.3854   3.1146
ADVA:  (18945,) (34713,) 0.5457609541094115
ADV1:  0.0009986084692050184 0.0003714894604781438 0.008249341414316842 0.05565567787066211 -0.07319964775658677
ADV

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0014   0.0060   8.4490   2.3854   3.1146
ADVA:  (20033,) (35658,) 0.5618094116327332
ADV1:  0.00018903526018203796 -0.00015732549421172397 0.007597467343160986 0.07976302300556815 -0.07387519758791083
ADVB:  (19352,) (35658,) 0.5427113130293343
ADV2:  0.08175766884285068 0.341133185229014 0.4952304936795608 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3650   0.5530   2.7645 139.5036  79.4226  47.4570
***** Episode 53187, Mean R = -18.0  Std R = 4.6  Min R = -28.7
PolicyLoss: 1.99
Policy_Entropy: 0.207
Policy_KL: 0.00822
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 1.94e+07
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00376


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0025   8.4490   2.3854   3.1146
ADVA:  (20794,) (35403,) 0.587351354404994
ADV1:  0.0004018449070020604 -0.00019257779662631711 0.008478572468731186 0.05037887453007306 -0.07035021454741597
ADV

***** Episode 53404, Mean R = -17.5  Std R = 4.9  Min R = -27.6
PolicyLoss: 2.04
Policy_Entropy: 0.207
Policy_KL: 0.0118
Policy_SD: 0.533
Steps: 1.19e+04
TotalSteps: 1.95e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00396


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0010   0.0038   8.4490   2.3854   3.1146
ADVA:  (21244,) (35184,) 0.6037971805366076
ADV1:  0.0 -0.0010973835290257255 0.010133123712544925 0.0808558743059126 -0.1527343895835479
ADVB:  (18065,) (35184,) 0.513443610732151
ADV2:  0.02460792900936802 0.29672657445163175 0.44877655628754193 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.8779   1.6391   7.8437 139.5036  79.4226  47.4570
***** Episode 53435, Mean R = -19.3  Std R = 6.2  Min R = -40.2
PolicyLoss: 1.84
Policy_Entropy: 0.207
Policy_KL: 0.0078
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 1.95e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.00428


ValFun  Gradients: u/sd/Max/C Max

cs_angles |  0.0018  0.0044 |  0.0817  0.0769 | -0.9663 -0.9776 |  0.9871  0.9955
optical_flow |  0.0000 -0.0001 |  0.0215  0.0226 | -1.0850 -1.2376 |  1.3714  1.2093
v_err    | -0.0112 |  0.0603 | -0.4525 |  0.1082
landing_rewards |    9.29 |    2.57 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -21.71 |    4.34 |  -36.82 |  -14.37
steps    |     379 |      21 |     338 |     420
***** Episode 53683, Mean R = -15.3  Std R = 3.1  Min R = -22.4
PolicyLoss: 1.94
Policy_Entropy: 0.208
Policy_KL: 0.0079
Policy_SD: 0.521
Steps: 1.19e+04
TotalSteps: 1.96e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00336


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0030   8.4490   2.3854   3.1146
ADVA:  (19699,) (35336,) 0.557476794204211
ADV1:  0.0008627546570336327 -0.00014842766089969602 0.009068712019609208 0.0907437587300064 -0.06768944174459451
ADVB:  (20440,) (35336,) 0.5784469096671949
ADV2:  0.1

attitude |   -0.07    0.04    0.01 |    1.17    0.67    1.80 |   -3.14   -1.55   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |    0.05   -0.02 |    0.67    1.80 |   -1.44   -3.13 |    1.43    3.11
w_f      |    0.01    0.00   -0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.03 |    0.03    0.02    0.02
w_rewards |   -0.20 |    0.15 |   -0.82 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.29 |    0.00 |    2.37
seeker_angles |    0.00    0.01 |    0.08    0.08 |   -0.97   -0.93 |    0.99    0.99
cs_angles |  0.0012  0.0068 |  0.0833  0.0834 | -0.9695 -0.9330 |  0.9902  0.9861
optical_flow | -0.0000 -0.0000 |  0.0205  0.0217 | -0.9274 -1.1931 |  1.0467  1.0988
v_err    | -0.0115 |  0.0609 | -0.4526 |  0.1032
landing_rewards |    9.39 |    2.40 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (14048,) (35190,) 0.399204319408923
ADV1:  0.000682140957133013 0.0009106072522557002 0.009126474080959441 0.2612732535637601 -0.23769599041744183
ADVB:  (23917,) (35190,) 0.6796533105996022
ADV2:  0.4279529258789223 0.6230409234456794 0.6909561451707665 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.3995   0.9082   4.6396 139.5036  79.4226  47.4570
Update Cnt = 1750    ET =   1258.3   Stats:  Mean, Std, Min, Max
r_f      |    2.85    6.57    0.51 |  183.77  178.31  197.15 | -380.08 -391.99 -365.81 |  394.46  380.39  388.30
v_f      |   -0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.10   -0.08   -0.09 |    0.10    0.10    0.10
r_i      |    8.87   44.75   -0.96 |  699.82  672.38  731.29 |-1306.43-1323.13-1339.10 | 1297.93 1303.63 1274.81
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.09    0.09
norm_rf  |    0.17 |    0.08 |    0.01 |    0.42
norm_vf  |    0.08 |    0.01 |    0.04 |    0.11
gs_f     |  

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0016   8.4490   2.3854   3.1146
ADVA:  (20468,) (35230,) 0.5809821175134828
ADV1:  0.00031842653523764773 -0.0003651032274952087 0.00854731729886752 0.07727249616087711 -0.07142497322007699
ADVB:  (19557,) (35230,) 0.5551234743116662
ADV2:  0.08950592311499948 0.3141250861562898 0.44962655973105403 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0859   0.3850   2.0165 139.5036  79.4226  47.4570
***** Episode 54551, Mean R = -17.5  Std R = 6.8  Min R = -36.3
PolicyLoss: 1.79
Policy_Entropy: 0.208
Policy_KL: 0.00594
Policy_SD: 0.529
Steps: 1.19e+04
TotalSteps: 1.99e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00414


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0010   8.4490   2.3854   3.1146
ADVA:  (22368,) (35529,) 0.6295702102507811
ADV1:  0.000742899996195

***** Episode 54768, Mean R = -17.1  Std R = 5.5  Min R = -29.1
PolicyLoss: 2.67
Policy_Entropy: 0.207
Policy_KL: 0.00842
Policy_SD: 0.534
Steps: 1.16e+04
TotalSteps: 2e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00413


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0022   8.4490   2.3854   3.1146
ADVA:  (17836,) (34985,) 0.5098184936401314
ADV1:  0.0 -3.185809549332272e-05 0.006024690562734521 0.055782535391265375 -0.05800193328664177
ADVB:  (19220,) (34985,) 0.5493783049878519
ADV2:  0.1095465096409966 0.39850610390187224 0.5516513665396561 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4745   0.4974   2.5709 139.5036  79.4226  47.4570
***** Episode 54799, Mean R = -14.4  Std R = 3.8  Min R = -27.9
PolicyLoss: 2.29
Policy_Entropy: 0.208
Policy_KL: 0.00766
Policy_SD: 0.534
Steps: 1.17e+04
TotalSteps: 2e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00372


Dynamics: Max Disturbance (m/s^2):

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7350   0.2588   1.3652 139.5036  79.4226  47.4570
***** Episode 55016, Mean R = -18.4  Std R = 6.7  Min R = -40.9
PolicyLoss: 1.62
Policy_Entropy: 0.208
Policy_KL: 0.00839
Policy_SD: 0.538
Steps: 1.19e+04
TotalSteps: 2.01e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.957
VF_0_Loss : 0.00411


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0025   8.4490   2.3854   3.1146
ADVA:  (18958,) (35463,) 0.5345853424696162
ADV1:  0.001756430018797361 0.0013708269803496189 0.01884356256118732 0.3065221399379412 -0.48681032579834105
ADVB:  (24424,) (35463,) 0.6887178185714689
ADV2:  0.25521199810478035 0.3810324626543731 0.4852463645034739 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5245   0.2010   1.0003 139.5036  79.4226  47.4570
***** Episode 55047, Mean R = -16.9  Std R = 4.6  Min R = -30.4
PolicyLoss: 1.77
Policy_Entropy: 0.208
Policy_KL: 0.00815
Policy_SD: 0.536
Steps: 1.18e+04
TotalSteps:

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0016   0.0058   8.4490   2.3854   3.1146
ADVA:  (21351,) (34815,) 0.6132701421800948
ADV1:  0.0003125159726218383 -0.00028771426502608615 0.008815214558124932 0.0717901084290008 -0.06633641640633163
ADVB:  (18813,) (34815,) 0.5403705299439897
ADV2:  0.07280588361064438 0.3198587437687341 0.4493107809791578 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.5303   1.0815   5.0696 149.7444 133.1056  53.3563
***** Episode 55264, Mean R = -17.3  Std R = 5.3  Min R = -32.9
PolicyLoss: 1.88
Policy_Entropy: 0.207
Policy_KL: 0.0378
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 2.02e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00456


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0011   0.0041   8.4490   2.3854   3.1146
ADVA:  (21945,) (34879,) 0.6291751483700794
ADV1:  0.0005625714700113124 -0.00029585772892017515 0.00958583771156611 0.0717901084290008 -0.06788196620237652
ADVB: 

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.07   -0.05 |    0.06    0.05    0.07
a_f      |   -0.01   -0.12 |    0.64    1.92 |   -1.31   -3.13 |    1.38    3.14
w_f      |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.03    0.03    0.02
w_rewards |   -0.17 |    0.15 |   -0.83 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.29 |    0.00 |    2.58
seeker_angles |    0.00    0.01 |    0.08    0.08 |   -1.00   -0.96 |    0.99    1.00
cs_angles |  0.0011  0.0077 |  0.0807  0.0782 | -0.9986 -0.9593 |  0.9900  0.9965
optical_flow | -0.0000  0.0001 |  0.0207  0.0199 | -1.0916 -1.1477 |  1.0207  0.9184
v_err    | -0.0115 |  0.0611 | -0.4576 |  0.1316
landing_rewards |    9.55 |    2.08 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -21.71 |    4.88 |  -47.63 |  -14.03
steps    |     380 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0999   0.3914   1.8138 149.7444 133.1056  53.3563
Update Cnt = 1800    ET =   1178.5   Stats:  Mean, Std, Min, Max
r_f      |  -23.15  -29.39   -3.67 |  194.35  160.85  199.41 | -376.10 -363.48 -381.27 |  390.24  367.38  376.52
v_f      |    0.00    0.01    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.13    0.11
r_i      |  -51.63  -94.33   11.43 |  710.46  629.46  757.42 |-1288.78-1329.10-1344.33 | 1316.73 1197.42 1303.72
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.08    0.10    0.10
norm_rf  |    0.17 |    0.08 |    0.02 |    0.50
norm_vf  |    0.08 |    0.02 |    0.03 |    0.13
gs_f     |    1.20 |    1.98 |    0.00 |   24.37
thrust   |   -0.00    0.00    0.00 |    0.66    0.66    0.66 |   -3.20   -3.39   -3.31 |    3.46    3.30    3.43
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.53 |    0.17 |    1.12 |    2.06
rewards  |  -17.58 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.8261   0.7912   3.6101 149.7444 133.1056  53.3563
***** Episode 56101, Mean R = -18.2  Std R = 4.9  Min R = -27.3
PolicyLoss: 2.05
Policy_Entropy: 0.208
Policy_KL: 0.00785
Policy_SD: 0.538
Steps: 1.19e+04
TotalSteps: 2.05e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00343


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   8.4490   2.3854   3.1146
ADVA:  (19194,) (35347,) 0.5430163804566158
ADV1:  0.0010096997735739646 0.0006379262635717902 0.007761759628035567 0.06645470270668713 -0.09026528377514231
ADVB:  (22248,) (35347,) 0.6294169236427419
ADV2:  0.24198015413364315 0.44042309272612656 0.540665333409969 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4919   0.5659   2.6205 149.7444 133.1056  53.3563
***** Episode 56132, Mean R = -17.6  Std R = 5.2  Min R = -32.4
PolicyLoss: 2.23
Policy_Entropy: 0.207
Policy_KL: 0.00643
Policy_SD: 0.541
Steps: 1.18e+04
TotalSte

ADVA:  (21296,) (35116,) 0.6064472035539356
ADV1:  0.00010583377868009919 -0.00025330824381358466 0.008927782071677753 0.06225892758933166 -0.08722398107031898
ADVB:  (17739,) (35116,) 0.5051543455974484
ADV2:  0.00949041181754773 0.3106538134919229 0.4868732452441115 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4115   0.4919   2.3505 149.7444 133.1056  53.3563
***** Episode 56349, Mean R = -17.4  Std R = 6.9  Min R = -35.2
PolicyLoss: 1.96
Policy_Entropy: 0.208
Policy_KL: 0.00831
Policy_SD: 0.541
Steps: 1.16e+04
TotalSteps: 2.06e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00325


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0010   0.0037   8.4490   2.3854   3.1146
ADVA:  (20560,) (35433,) 0.5802500493889877
ADV1:  9.098446869977572e-06 -0.0002958835655680125 0.008672571706232882 0.06225892758933166 -0.08722398107031898
ADVB:  (18246,) (35433,) 0.5149436965540598
ADV2:  0.029168664633631426 0.3249186902056771 0.493798090219

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0013   0.0045   8.4490   2.3854   3.1146
ADVA:  (20181,) (35018,) 0.5763036152835684
ADV1:  0.0009083705541238236 0.0004135395068892661 0.0077930403002599595 0.09958930202954275 -0.06182182375995803
ADVB:  (20527,) (35018,) 0.5861842481009767
ADV2:  0.15618070687043384 0.3792098975000976 0.49049557869191657 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3194   0.5880   2.7694 149.7444 133.1056  53.3563
***** Episode 56597, Mean R = -18.3  Std R = 6.3  Min R = -39.9
PolicyLoss: 2.04
Policy_Entropy: 0.209
Policy_KL: 0.00919
Policy_SD: 0.544
Steps: 1.16e+04
TotalSteps: 2.07e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00313


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0034   0.0022   0.0075   8.4490   2.3854   3.1146
ADVA:  (21826,) (35237,) 0.6194057382864603
ADV1:  0.0 -0.0007349054312116135 0.008526984891271691 0.050572740572661645 -0.06182182375995803
ADVB:  (15874,) (352

***** Episode 56814, Mean R = -16.5  Std R = 3.8  Min R = -23.2
PolicyLoss: 2.53
Policy_Entropy: 0.21
Policy_KL: 0.00831
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps: 2.08e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.0036


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0014   8.4490   2.3854   3.1146
ADVA:  (19846,) (35430,) 0.5601467682754727
ADV1:  0.0 -0.0006876631124439436 0.007969762175146267 0.09851223807368426 -0.07781881421103687
ADVB:  (18406,) (35430,) 0.5195032458368615
ADV2:  0.03425182011680496 0.32114537507048047 0.48063597662096963 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.5621   2.3433  10.4018 149.7444 133.1056  53.3563
***** Episode 56845, Mean R = -17.0  Std R = 4.7  Min R = -29.0
PolicyLoss: 1.95
Policy_Entropy: 0.21
Policy_KL: 0.00988
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 2.08e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00346


ValFun  Gradients: u/sd/Max/C M

seeker_angles |    0.00    0.00 |    0.08    0.08 |   -1.00   -0.96 |    0.96    0.99
cs_angles |  0.0009  0.0049 |  0.0802  0.0794 | -0.9994 -0.9636 |  0.9639  0.9881
optical_flow | -0.0001 -0.0000 |  0.0195  0.0219 | -0.9562 -1.0087 |  1.0038  1.1758
v_err    | -0.0111 |  0.0606 | -0.4663 |  0.1122
landing_rewards |    9.74 |    1.59 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -21.68 |    4.91 |  -57.68 |  -14.12
steps    |     380 |      19 |     339 |     420
***** Episode 57093, Mean R = -16.5  Std R = 4.6  Min R = -26.4
PolicyLoss: 2.29
Policy_Entropy: 0.21
Policy_KL: 0.00931
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 2.09e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00394


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0001   0.0006   8.4490   2.3854   3.1146
ADVA:  (17828,) (35200,) 0.5064772727272727
ADV1:  0.0005362661787796227 5.2507700903983626e-05 0.00777280771822415 0.1398527

attitude |   -0.05    0.01   -0.00 |    1.30    0.70    1.90 |   -3.14   -1.57   -3.14 |    3.14    1.55    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |    0.01    0.02 |    0.70    1.89 |   -1.49   -3.14 |    1.51    3.13
w_f      |    0.01    0.00   -0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.03 |    0.03    0.02    0.02
w_rewards |   -0.21 |    0.16 |   -0.79 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.29 |    0.00 |    2.89
seeker_angles |   -0.00    0.00 |    0.08    0.08 |   -1.00   -0.98 |    1.00    1.00
cs_angles | -0.0002  0.0017 |  0.0815  0.0804 | -0.9990 -0.9822 |  0.9962  0.9998
optical_flow | -0.0000  0.0002 |  0.0191  0.0214 | -0.9315 -0.9775 |  1.1120  0.9668
v_err    | -0.0114 |  0.0609 | -0.4535 |  0.1305
landing_rewards |    9.65 |    1.85 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5724   0.2033   1.0147 149.7444 133.1056  53.3563
Update Cnt = 1860    ET =   1207.9   Stats:  Mean, Std, Min, Max
r_f      |  -11.03  -34.27  -10.67 |  191.03  171.88  185.02 | -382.90 -367.89 -399.18 |  395.59  366.04  382.57
v_f      |    0.00    0.00    0.00 |    0.04    0.05    0.05 |   -0.09   -0.10   -0.09 |    0.10    0.10    0.10
r_i      |  -18.76  -93.85  -45.72 |  686.06  686.69  725.62 |-1327.53-1279.43-1347.76 | 1257.49 1271.65 1280.85
v_i      |    0.00    0.01    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.10    0.09
norm_rf  |    0.16 |    0.07 |    0.03 |    0.39
norm_vf  |    0.08 |    0.02 |    0.03 |    0.11
gs_f     |    1.05 |    1.27 |    0.00 |    8.80
thrust   |    0.00    0.00    0.00 |    0.65    0.67    0.66 |   -3.45   -3.46   -3.42 |    3.46    3.44    3.39
norm_thrust |    0.89 |    0.73 |    0.00 |    3.46
fuel     |    1.54 |    0.17 |    1.16 |    2.44
rewards  |  -17.11 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7483   0.2537   1.2228 149.7444 133.1056  53.3563
***** Episode 57961, Mean R = -16.4  Std R = 5.3  Min R = -27.7
PolicyLoss: 1.91
Policy_Entropy: 0.211
Policy_KL: 0.00887
Policy_SD: 0.538
Steps: 1.16e+04
TotalSteps: 2.12e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00356


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0023   8.4490   2.3854   3.1146
ADVA:  (19924,) (35146,) 0.5668923917373243
ADV1:  0.0010054266395329945 0.0006944532923741006 0.008669683451494612 0.06438891187260432 -0.05353505545832133
ADVB:  (20976,) (35146,) 0.5968246742161271
ADV2:  0.18508494522346358 0.41886729073861795 0.5559112710112561 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6864   0.2942   1.3769 149.7444 133.1056  53.3563
***** Episode 57992, Mean R = -16.1  Std R = 4.8  Min R = -28.4
PolicyLoss: 2.21
Policy_Entropy: 0.211
Policy_KL: 0.00637
Policy_SD: 0.53
Steps: 1.17e+04
TotalSte

ADVA:  (20835,) (35549,) 0.5860924357928493
ADV1:  0.001688374172392676 0.0007875172465948935 0.009208546570384392 0.05949637361323068 -0.08727643819166175
ADVB:  (22744,) (35549,) 0.6397929618273369
ADV2:  0.259264534573846 0.42926348651666646 0.5127101369818585 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9227   0.3580   1.7283 149.7444 133.1056  53.3563
***** Episode 58209, Mean R = -16.8  Std R = 4.5  Min R = -27.2
PolicyLoss: 2.11
Policy_Entropy: 0.212
Policy_KL: 0.00711
Policy_SD: 0.526
Steps: 1.19e+04
TotalSteps: 2.13e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00279


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0052   8.4490   2.3854   3.1146
ADVA:  (20202,) (35469,) 0.5695677915926584
ADV1:  0.0012898325571527773 0.00048488097449929287 0.009084098491327228 0.05360867680082687 -0.09970917602862772
ADVB:  (21784,) (35469,) 0.6141701203868166
ADV2:  0.21917138602186392 0.4096955007111821 0.503506043385485 3.0

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0034   8.4490   2.3854   3.1146
ADVA:  (20280,) (35334,) 0.5739514348785872
ADV1:  0.00034820170041907975 -0.00012141656476260438 0.008405545327391083 0.04571821041785812 -0.0921360414398722
ADVB:  (19817,) (35334,) 0.5608479085300278
ADV2:  0.11408648902548088 0.3556554400788043 0.4750981051849685 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8446   0.2732   1.3627 149.7444 133.1056  53.3563
***** Episode 58457, Mean R = -15.9  Std R = 4.5  Min R = -30.9
PolicyLoss: 2
Policy_Entropy: 0.21
Policy_KL: 0.0097
Policy_SD: 0.533
Steps: 1.18e+04
TotalSteps: 2.14e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.00305


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0054   8.4490   2.3854   3.1146
ADVA:  (19166,) (35154,) 0.5452011150935883
ADV1:  0.0004374823685336076 -0.00019286259329829485 0.008899084122956119 0.13381301878959234 -0.0935681512590183
ADVB:  (

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0020   0.0074   8.4490   2.3854   3.1146
ADVA:  (18381,) (35201,) 0.5221726655492742
ADV1:  0.0016879919228084926 0.00046527893325702235 0.009716684145356175 0.06663725103981827 -0.06835333236289987
ADVB:  (23041,) (35201,) 0.654555268316241
ADV2:  0.2686236134722083 0.41789745665443306 0.48296021989535315 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0164   0.4050   1.9878 149.7444 133.1056  53.3563
***** Episode 58705, Mean R = -15.4  Std R = 4.2  Min R = -27.5
PolicyLoss: 2.02
Policy_Entropy: 0.21
Policy_KL: 0.00644
Policy_SD: 0.532
Steps: 1.16e+04
TotalSteps: 2.15e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00302


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0028   0.0017   0.0064   8.4490   2.3854   3.1146
ADVA:  (17432,) (35433,) 0.4919707617193012
ADV1:  0.0011005697775260592 0.0007171022775880692 0.008667007142481638 0.06663725103981827 -0.06540951668956968
ADVB: 

cs_angles | -0.0001  0.0040 |  0.0838  0.0810 | -0.9725 -0.9960 |  0.9535  0.9643
optical_flow | -0.0000  0.0000 |  0.0193  0.0200 | -1.0604 -1.1033 |  1.0409  0.8931
v_err    | -0.0116 |  0.0610 | -0.4535 |  0.1737
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -22.33 |    5.21 |  -44.22 |  -13.62
steps    |     379 |      20 |     334 |     417
***** Episode 58953, Mean R = -15.9  Std R = 4.7  Min R = -28.4
PolicyLoss: 1.99
Policy_Entropy: 0.211
Policy_KL: 0.00645
Policy_SD: 0.535
Steps: 1.15e+04
TotalSteps: 2.16e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00324


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0016   0.0058   8.4490   2.3854   3.1146
ADVA:  (21210,) (34897,) 0.6077886351262286
ADV1:  0.0 -0.0006171262047753868 0.008202245006372582 0.05912278835040308 -0.06790961722525177
ADVB:  (18261,) (34897,) 0.5232828036793994
ADV2:  0.03659375420250849

attitude |    0.02   -0.06    0.01 |    1.19    0.68    1.79 |   -3.14   -1.56   -3.14 |    3.14    1.55    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |   -0.05    0.02 |    0.68    1.79 |   -1.53   -3.14 |    1.44    3.14
w_f      |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.02   -0.03   -0.03 |    0.03    0.03    0.01
w_rewards |   -0.21 |    0.17 |   -0.87 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.30 |    0.00 |    2.46
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.97   -0.99 |    0.97    0.99
cs_angles |  0.0012  0.0011 |  0.0841  0.0807 | -0.9719 -0.9859 |  0.9736  0.9931
optical_flow | -0.0001  0.0000 |  0.0195  0.0217 | -0.9418 -1.2619 |  0.9212  1.0598
v_err    | -0.0116 |  0.0610 | -0.4533 |  0.1044
landing_rewards |    9.29 |    2.57 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1277   0.3290   1.6782 149.7444 133.1056  53.3563
Update Cnt = 1920    ET =   1367.0   Stats:  Mean, Std, Min, Max
r_f      |  -10.15    4.96  -11.77 |  185.38  174.18  197.37 | -395.33 -392.77 -393.07 |  384.85  397.07  395.73
v_f      |    0.00   -0.00    0.01 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.10    0.09    0.11
r_i      |  -57.49   14.37  -56.30 |  711.51  639.61  756.01 |-1360.39-1282.85-1307.23 | 1289.68 1275.95 1254.06
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.10    0.10
norm_rf  |    0.16 |    0.07 |    0.02 |    0.49
norm_vf  |    0.08 |    0.01 |    0.03 |    0.12
gs_f     |    1.45 |    3.37 |    0.01 |   45.88
thrust   |    0.00    0.00   -0.00 |    0.65    0.68    0.66 |   -3.44   -3.24   -3.39 |    3.44    3.39    3.45
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.53 |    0.18 |    1.12 |    2.19
rewards  |  -17.07 

ADVA:  (18783,) (35016,) 0.5364119259766964
ADV1:  0.00033261586857456955 0.00025385648369805237 0.007946325770306654 0.07804444154612944 -0.060776906852517076
ADVB:  (19648,) (35016,) 0.5611149188942198
ADV2:  0.12846227889646106 0.40424555167623333 0.5506093368178799 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9862   0.3363   1.8153 149.7444 133.1056  53.3563
***** Episode 59821, Mean R = -16.2  Std R = 4.8  Min R = -30.3
PolicyLoss: 2.26
Policy_Entropy: 0.211
Policy_KL: 0.00796
Policy_SD: 0.542
Steps: 1.18e+04
TotalSteps: 2.19e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.00253


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0009   8.4490   2.3854   3.1146
ADVA:  (18532,) (35180,) 0.5267765776009096
ADV1:  0.0 -5.025709866413937e-06 0.00729662475607006 0.05635932134793997 -0.060776906852517076
ADVB:  (19459,) (35180,) 0.55

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0009   8.4490   2.3854   3.1146
ADVA:  (18212,) (35265,) 0.5164327236636892
ADV1:  0.00020351413273588057 -0.0001358876643793878 0.007356816302759295 0.10712407011567537 -0.06590669763782825
ADVB:  (20117,) (35265,) 0.5704522898057565
ADV2:  0.13382533413630365 0.3638588033534721 0.5013487138019752 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8072   0.2725   1.3616 149.7444 133.1056  53.3563
***** Episode 60069, Mean R = -17.9  Std R = 5.9  Min R = -35.4
PolicyLoss: 2.01
Policy_Entropy: 0.211
Policy_KL: 0.00746
Policy_SD: 0.542
Steps: 1.17e+04
TotalSteps: 2.2e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00273


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0002   0.0009   8.4490   2.3854   3.1146
ADVA:  (18270,) (35530,) 0.5142133408387278
ADV1:  0.000181695781987401 -0.00014930738978863057 0.008199901828541205 0.10712407011567537 -0.07296731194279651
ADVB

***** Episode 60286, Mean R = -17.8  Std R = 6.5  Min R = -36.9
PolicyLoss: 2.03
Policy_Entropy: 0.211
Policy_KL: 0.0109
Policy_SD: 0.543
Steps: 1.18e+04
TotalSteps: 2.21e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00298


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0012   0.0044   8.4490   2.3854   3.1146
ADVA:  (20528,) (35257,) 0.5822389880023825
ADV1:  0.00057879202615564 0.000183628476551323 0.00808453524530524 0.07856320622181462 -0.0784639036242043
ADVB:  (20012,) (35257,) 0.5676035964489321
ADV2:  0.1144407075027032 0.3402690494580095 0.48501996458596125 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.3041   2.6552   9.3205 149.7444 133.1056  53.3563
***** Episode 60317, Mean R = -15.6  Std R = 4.2  Min R = -26.4
PolicyLoss: 1.88
Policy_Entropy: 0.211
Policy_KL: 0.00669
Policy_SD: 0.541
Steps: 1.15e+04
TotalSteps: 2.21e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00273


ValFun  Gradients: 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.3072   2.6854  10.1065 149.7444 133.1056  53.3563
***** Episode 60534, Mean R = -17.3  Std R = 5.2  Min R = -30.3
PolicyLoss: 1.95
Policy_Entropy: 0.211
Policy_KL: 0.00813
Policy_SD: 0.549
Steps: 1.16e+04
TotalSteps: 2.22e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.0039


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0003   0.0013   8.4490   2.3854   3.1146
ADVA:  (19015,) (34900,) 0.5448424068767909
ADV1:  0.0006972562908474228 0.0003887501707241995 0.007792374312302268 0.05666490938450969 -0.09747027071460734
ADVB:  (20948,) (34900,) 0.6002292263610315
ADV2:  0.18376661276830628 0.3981189495987179 0.5219373013663005 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   9.0301   3.8955  16.1426 149.7444 133.1056  53.3563
***** Episode 60565, Mean R = -15.7  Std R = 5.1  Min R = -33.9
PolicyLoss: 2.08
Policy_Entropy: 0.211
Policy_KL: 0.00949
Policy_SD: 0.549
Steps: 1.18e+04
TotalStep

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |    0.04   -0.10 |    0.70    1.86 |   -1.55   -3.13 |    1.53    3.12
w_f      |    0.01    0.00   -0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.03 |    0.04    0.03    0.02
w_rewards |   -0.19 |    0.15 |   -0.75 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.89
seeker_angles |   -0.00    0.00 |    0.08    0.08 |   -0.99   -0.95 |    0.98    1.00
cs_angles | -0.0020  0.0033 |  0.0825  0.0774 | -0.9881 -0.9508 |  0.9755  0.9965
optical_flow | -0.0000 -0.0000 |  0.0197  0.0205 | -0.9157 -0.8678 |  0.8921  1.0890
v_err    | -0.0118 |  0.0613 | -0.4614 |  0.1073
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.07 |    0.03
tracking_rewards |  -21.63 |    4.61 |  -42.25 |  -14.32
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1298   0.3746   1.9404 149.7444 133.1056  53.3563
Update Cnt = 1970    ET =   1421.8   Stats:  Mean, Std, Min, Max
r_f      |   -1.67   -8.21    4.12 |  179.16  170.65  205.62 | -378.22 -384.81 -371.46 |  370.72  375.40  389.67
v_f      |    0.00    0.00    0.00 |    0.04    0.05    0.05 |   -0.10   -0.10   -0.10 |    0.10    0.10    0.09
r_i      |   -6.02  -32.86  -11.04 |  664.12  670.76  777.35 |-1244.43-1299.88-1331.53 | 1326.68 1318.72 1316.03
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.10    0.09
norm_rf  |    0.17 |    0.07 |    0.01 |    0.39
norm_vf  |    0.08 |    0.02 |    0.03 |    0.13
gs_f     |    1.40 |    1.99 |    0.01 |   15.85
thrust   |    0.00    0.00   -0.00 |    0.66    0.67    0.67 |   -3.45   -3.44   -3.44 |    3.18    3.41    3.36
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.56 |    0.19 |    1.17 |    2.21
rewards  |  -17.50 

ADVA:  (22221,) (34695,) 0.6404669260700389
ADV1:  0.0018829215687131869 0.0001989387810330436 0.01086101011019412 0.07071346927483135 -0.12011861732133039
ADVB:  (21184,) (34695,) 0.6105778930681655
ADV2:  0.17481123763709322 0.3382679534829563 0.4224440073412977 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9160   0.3305   1.6414 149.7444 133.1056  53.3563
***** Episode 61371, Mean R = -17.9  Std R = 6.8  Min R = -37.2
PolicyLoss: 1.74
Policy_Entropy: 0.213
Policy_KL: 0.00569
Policy_SD: 0.545
Steps: 1.15e+04
TotalSteps: 2.25e+07
VF_0_ExplainedVarNew: 0.978
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.00399


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0036   0.0022   0.0078   8.4490   2.3854   3.1146
ADVA:  (23185,) (34852,) 0.6652415930219213
ADV1:  0.001982167156840896 0.0002003949350466911 0.010560035266403672 0.07455324974011551 -0.12011861732133039
ADVB:  (20970,) (34852,) 0.6016871341673362
ADV2:  0.16740311493164423 0.3221383631210737 0.4032203210956997 3.

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0028   8.4490   2.3854   3.1146
ADVA:  (21333,) (35170,) 0.606568097810634
ADV1:  0.0 -0.0005034185216883442 0.008613302132847325 0.04961738029013585 -0.07061107960315094
ADVB:  (16137,) (35170,) 0.458828547057151
ADV2:  0.0 0.283762080079107 0.48224393905553664 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.3318   1.4048   6.7887 149.7444 133.1056  53.3563
***** Episode 61619, Mean R = -16.5  Std R = 6.0  Min R = -35.9
PolicyLoss: 1.93
Policy_Entropy: 0.212
Policy_KL: 0.00862
Policy_SD: 0.542
Steps: 1.16e+04
TotalSteps: 2.26e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00368


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0002   0.0009   8.4490   2.3854   3.1146
ADVA:  (21279,) (35215,) 0.6042595484878602
ADV1:  2.1837108845580364e-05 -0.00037655867612557326 0.009164776450328552 0.04961738029013585 -0.07061107960315094
ADVB:  (18023,) (35215,) 0.51179894931

***** Episode 61836, Mean R = -16.9  Std R = 4.4  Min R = -28.2
PolicyLoss: 2.15
Policy_Entropy: 0.212
Policy_KL: 0.00751
Policy_SD: 0.544
Steps: 1.15e+04
TotalSteps: 2.27e+07
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.0038


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0045   0.0027   0.0097   8.4490   2.3854   3.1146
ADVA:  (19762,) (35064,) 0.5635979922427561
ADV1:  0.0 -0.0004524104051841535 0.008381289984099746 0.047036072213982594 -0.06600110641662521
ADVB:  (18473,) (35064,) 0.5268366415697011
ADV2:  0.05119870546264938 0.32749562299216467 0.47686220373007043 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7563   0.7381   3.0643 149.7444 133.1056  53.3563
***** Episode 61867, Mean R = -17.2  Std R = 5.9  Min R = -33.6
PolicyLoss: 1.94
Policy_Entropy: 0.213
Policy_KL: 0.00585
Policy_SD: 0.547
Steps: 1.18e+04
TotalSteps: 2.27e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.00349


ValFun  Gradients: u/sd/Max

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  14.5296  10.2830  37.9169 149.7444 133.1056  53.3563
***** Episode 62084, Mean R = -17.8  Std R = 6.2  Min R = -32.4
PolicyLoss: 2.14
Policy_Entropy: 0.213
Policy_KL: 0.0134
Policy_SD: 0.545
Steps: 1.18e+04
TotalSteps: 2.28e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00309


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0027   8.4490   2.3854   3.1146
ADVA:  (18620,) (35400,) 0.5259887005649717
ADV1:  0.0 -0.00014915882005548842 0.007863312509991754 0.07499787745111164 -0.0655036887841598
ADVB:  (19910,) (35400,) 0.5624293785310734
ADV2:  0.11282661393697252 0.3654636613444353 0.520386539776633 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.7384   3.2558  13.1958 149.7444 133.1056  53.3563
***** Episode 62115, Mean R = -16.5  Std R = 3.6  Min R = -25.7
PolicyLoss: 2.04
Policy_Entropy: 0.213
Policy_KL: 0.0143
Policy_SD: 0.541
Steps: 1.18e+04
TotalSteps: 2.28e+07
VF_0_Ex

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.07   -0.05 |    0.06    0.05    0.07
a_f      |   -0.06    0.02 |    0.65    1.80 |   -1.50   -3.13 |    1.44    3.13
w_f      |    0.01    0.00   -0.01 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.04 |    0.03    0.02    0.01
w_rewards |   -0.19 |    0.13 |   -0.78 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.30 |    0.00 |    2.50
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -1.00 |    1.00    0.94
cs_angles |  0.0014  0.0045 |  0.0827  0.0833 | -0.9824 -0.9996 |  0.9957  0.9390
optical_flow | -0.0000  0.0001 |  0.0220  0.0218 | -0.9844 -1.0158 |  1.1249  1.2138
v_err    | -0.0111 |  0.0603 | -0.4527 |  0.1187
landing_rewards |    9.29 |    2.57 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.07 |    0.02
tracking_rewards |  -22.18 |    4.78 |  -52.80 |  -13.88
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.8353   0.6279   2.9737 149.7444 133.1056  53.3563
Update Cnt = 2020    ET =   1457.4   Stats:  Mean, Std, Min, Max
r_f      |    4.99    7.45    5.25 |  189.02  169.67  197.59 | -375.41 -374.95 -396.50 |  394.97  380.03  363.63
v_f      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.11   -0.10 |    0.12    0.12    0.11
r_i      |   39.24   39.53   42.86 |  706.37  625.35  761.15 |-1322.53-1295.52-1244.40 | 1369.00 1255.65 1342.56
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.09    0.09
norm_rf  |    0.16 |    0.07 |    0.04 |    0.44
norm_vf  |    0.08 |    0.01 |    0.04 |    0.13
gs_f     |    1.33 |    2.34 |    0.00 |   27.39
thrust   |   -0.00    0.00   -0.00 |    0.65    0.68    0.66 |   -3.39   -3.46   -3.40 |    3.32    3.09    3.42
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.54 |    0.18 |    1.20 |    2.71
rewards  |  -16.80 

ADVA:  (22259,) (35232,) 0.6317836058128974
ADV1:  0.0009686934754132483 -0.0005305288678463282 0.009972200253206826 0.05051850323483059 -0.07223106235601387
ADVB:  (20014,) (35232,) 0.5680631244323342
ADV2:  0.11899382423953124 0.30505723465040435 0.4125656317411147 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5298   0.6996   3.3425 149.7444 133.1056  53.3563
***** Episode 62921, Mean R = -19.4  Std R = 7.0  Min R = -35.3
PolicyLoss: 1.67
Policy_Entropy: 0.215
Policy_KL: 0.00881
Policy_SD: 0.541
Steps: 1.18e+04
TotalSteps: 2.31e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.00317


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0015   0.0054   8.4490   2.3854   3.1146
ADVA:  (22776,) (35443,) 0.6426092599384928
ADV1:  0.0013656907013095375 -2.9510719575435008e-05 0.009872423924964888 0.06618248209198696 -0.07223106235601387
ADVB:  (20136,) (35443,) 0.5681234658465706
ADV2:  0.12358208274252243 0.31751409999749824 0.419442747461

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0014   0.0048   8.4490   2.3854   3.1146
ADVA:  (21468,) (35279,) 0.6085206496782789
ADV1:  0.0013697580528602912 0.000356035257658268 0.008713151128561872 0.1371330534723827 -0.12606721307940438
ADVB:  (22189,) (35279,) 0.6289577368973044
ADV2:  0.22286294510903434 0.3720354292308879 0.4513333222573606 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0922   0.4609   2.0595 149.7444 133.1056  53.3563
***** Episode 63169, Mean R = -15.6  Std R = 5.1  Min R = -33.0
PolicyLoss: 1.85
Policy_Entropy: 0.215
Policy_KL: 0.00789
Policy_SD: 0.543
Steps: 1.19e+04
TotalSteps: 2.32e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.0037


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0027   8.4490   2.3854   3.1146
ADVA:  (21909,) (35723,) 0.613302354225569
ADV1:  0.0009859087227890636 0.00038772429422410666 0.008408157681076972 0.1371330534723827 -0.12606721307940438
ADVB:  (20

***** Episode 63386, Mean R = -17.9  Std R = 5.2  Min R = -30.6
PolicyLoss: 1.66
Policy_Entropy: 0.213
Policy_KL: 0.00932
Policy_SD: 0.556
Steps: 1.16e+04
TotalSteps: 2.32e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00365


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0006   0.0026   8.4490   2.3854   3.1146
ADVA:  (20732,) (34808,) 0.595610204550678
ADV1:  0.001251547613091217 0.0003023162330587673 0.00887391734930787 0.059197115551583135 -0.1033845797092684
ADVB:  (21756,) (34808,) 0.6250287290278097
ADV2:  0.21028663549083967 0.3758750681465896 0.47465780431848414 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.5609   1.0452   4.8893 149.7444 133.1056  53.3563
***** Episode 63417, Mean R = -16.7  Std R = 6.3  Min R = -35.8
PolicyLoss: 1.88
Policy_Entropy: 0.213
Policy_KL: 0.00662
Policy_SD: 0.552
Steps: 1.15e+04
TotalSteps: 2.33e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00326


ValFun  Gradien

 *** BROKE ***   3 0.9065329432487488
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  46.7449  42.7144  94.4214 149.7444 133.1056  53.3563
***** Episode 63634, Mean R = -18.6  Std R = 8.5  Min R = -43.0
PolicyLoss: 1.82
Policy_Entropy: 0.211
Policy_KL: 0.907
Policy_SD: 0.562
Steps: 1.14e+04
TotalSteps: 2.33e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00385


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0019   8.4490   2.3854   3.1146
ADVA:  (20778,) (34773,) 0.5975325683720127
ADV1:  0.00047944777778112426 -0.0002753283070835952 0.008976779756066278 0.057715322089665066 -0.1340448488619873
ADVB:  (19554,) (34773,) 0.5623328444482788
ADV2:  0.10056245518286382 0.3076498957429292 0.45152718563452166 3.0 0.0
 *** BROKE ***   1 1.053467869758606
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  76.7068   0.2421  76.9489 149.7444 133.1056  53.3563
***** Episode 63665, Mean R = -15.7  Std R = 4.3  Min R = -28.6
PolicyLoss: 1.98
Policy

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |    0.07    0.05 |    0.61    1.96 |   -1.47   -3.13 |    1.42    3.12
w_f      |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.03   -0.02   -0.03 |    0.02    0.02    0.02
w_rewards |   -0.20 |    0.14 |   -0.65 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.30 |    0.00 |    2.57
seeker_angles |    0.00    0.00 |    0.09    0.08 |   -0.98   -0.99 |    1.00    0.99
cs_angles |  0.0007  0.0035 |  0.0871  0.0825 | -0.9767 -0.9885 |  0.9985  0.9934
optical_flow |  0.0001  0.0000 |  0.0210  0.0203 | -0.9432 -1.1776 |  1.0831  1.0104
v_err    | -0.0111 |  0.0606 | -0.4551 |  0.1235
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -22.37 |    5.24 |  -46.91 |  -13.60
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6378   0.5737   3.2958 149.7444 133.1056  53.3563
Update Cnt = 2070    ET =   1514.7   Stats:  Mean, Std, Min, Max
r_f      |   11.25  -10.03   -8.77 |  186.80  173.56  195.68 | -381.37 -389.64 -391.59 |  383.66  390.63  385.61
v_f      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.08   -0.09   -0.10 |    0.08    0.09    0.10
r_i      |   42.15  -36.65  -45.71 |  668.80  667.68  765.58 |-1322.87-1270.18-1248.53 | 1362.62 1250.68 1317.59
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.10    0.09
norm_rf  |    0.15 |    0.07 |    0.02 |    0.40
norm_vf  |    0.08 |    0.01 |    0.02 |    0.12
gs_f     |    1.33 |    2.52 |    0.01 |   38.52
thrust   |   -0.01   -0.00    0.01 |    0.65    0.67    0.66 |   -3.41   -3.40   -3.21 |    3.41    3.41    3.43
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.53 |    0.16 |    1.07 |    2.18
rewards  |  -16.72 

ADVA:  (21867,) (35044,) 0.6239869877867823
ADV1:  0.0013275256841837023 0.00020317110238909792 0.008906188956868481 0.06899426753156401 -0.07100244341541849
ADVB:  (21662,) (35044,) 0.6181371989498916
ADV2:  0.2067196821434032 0.36084473673382167 0.444374651346105 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7900   0.2560   1.3016 149.7444 133.1056  53.3563
***** Episode 64471, Mean R = -17.7  Std R = 5.4  Min R = -32.3
PolicyLoss: 1.81
Policy_Entropy: 0.215
Policy_KL: 0.00522
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 2.37e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.981
VF_0_Loss : 0.00372


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0011   0.0039   8.4490   2.3854   3.1146
ADVA:  (21209,) (35300,) 0.6008215297450424
ADV1:  0.0009003320719129185 -0.00023238376227371782 0.008766729271915533 0.056877606259762126 -0.07100244341541849
ADVB:  (21201,) (35300,) 0.6005949008498583
ADV2:  0.16751100859180423 0.33837594873314736 0.42852948988706

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0015   8.4490   2.3854   3.1146
ADVA:  (19200,) (35177,) 0.5458111834437275
ADV1:  0.00032421526842601675 9.049429986693496e-06 0.0070106820791138716 0.0666145382753679 -0.07990984910711058
ADVB:  (20064,) (35177,) 0.5703726866986951
ADV2:  0.13948380799633037 0.38332995038257034 0.5153041021501248 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9703   0.3465   1.6934 149.7444 133.1056  53.3563
***** Episode 64719, Mean R = -16.0  Std R = 5.8  Min R = -38.1
PolicyLoss: 2.08
Policy_Entropy: 0.215
Policy_KL: 0.0087
Policy_SD: 0.535
Steps: 1.17e+04
TotalSteps: 2.37e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00347


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0009   0.0030   8.4490   2.3854   3.1146
ADVA:  (19840,) (35084,) 0.5654999429939573
ADV1:  0.0 -0.0006727260444410548 0.008241500717163059 0.07532461645089117 -0.09944420653927605
ADVB:  (19464,) (35084,

***** Episode 64936, Mean R = -17.4  Std R = 5.9  Min R = -32.1
PolicyLoss: 2.27
Policy_Entropy: 0.214
Policy_KL: 0.00659
Policy_SD: 0.54
Steps: 1.18e+04
TotalSteps: 2.38e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00392


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0010   8.4490   2.3854   3.1146
ADVA:  (20564,) (35164,) 0.5848026390626777
ADV1:  9.034510888168354e-06 -0.0003053897026035581 0.0075198307528622655 0.04947617475646271 -0.06250127192528576
ADVB:  (17912,) (35164,) 0.5093845978841998
ADV2:  0.015535472250612014 0.30868630755185467 0.48325109391400345 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8517   0.3255   1.6275 149.7444 133.1056  53.3563
***** Episode 64967, Mean R = -16.9  Std R = 6.5  Min R = -43.0
PolicyLoss: 1.89
Policy_Entropy: 0.214
Policy_KL: 0.00648
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 2.38e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00411


ValFun  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8379   0.3137   1.4964 149.7444 133.1056  53.3563
***** Episode 65184, Mean R = -17.6  Std R = 5.8  Min R = -29.1
PolicyLoss: 1.9
Policy_Entropy: 0.216
Policy_KL: 0.00742
Policy_SD: 0.538
Steps: 1.19e+04
TotalSteps: 2.39e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00378


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0003   0.0010   8.4490   2.3854   3.1146
ADVA:  (19959,) (35378,) 0.5641641698230538
ADV1:  0.0005195576449968584 -8.976715761481978e-05 0.008057766449518831 0.04534931988170088 -0.07070468162764937
ADVB:  (20252,) (35378,) 0.572446152976426
ADV2:  0.1455323815194553 0.3600940341588737 0.4774739235029005 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0242   0.4409   2.2785 149.7444 133.1056  53.3563
***** Episode 65215, Mean R = -17.1  Std R = 6.7  Min R = -35.5
PolicyLoss: 1.95
Policy_Entropy: 0.215
Policy_KL: 0.00658
Policy_SD: 0.541
Steps: 1.18e+04
TotalSteps

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |    0.00    0.04 |    0.66    1.87 |   -1.44   -3.13 |    1.47    3.13
w_f      |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.03    0.02    0.02
w_rewards |   -0.16 |    0.13 |   -0.70 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.62
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -1.00   -0.97 |    0.97    0.99
cs_angles |  0.0026  0.0038 |  0.0841  0.0818 | -0.9975 -0.9730 |  0.9664  0.9870
optical_flow |  0.0001  0.0002 |  0.0214  0.0207 | -1.1695 -1.1018 |  1.2627  1.0596
v_err    | -0.0107 |  0.0603 | -0.4533 |  0.0949
landing_rewards |    9.52 |    2.15 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -22.07 |    4.77 |  -38.07 |  -13.97
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4502   0.5279   2.5275 149.7444 133.1056  53.3563
Update Cnt = 2120    ET =   1405.5   Stats:  Mean, Std, Min, Max
r_f      |   -8.43   10.20    1.60 |  181.58  179.97  200.56 | -377.54 -395.84 -381.68 |  391.85  371.82  395.33
v_f      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.11   -0.10   -0.08 |    0.10    0.10    0.08
r_i      |  -49.37   31.56   -7.19 |  690.04  679.11  747.39 |-1350.60-1306.11-1304.45 | 1313.37 1337.87 1306.47
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.08    0.10
norm_rf  |    0.16 |    0.07 |    0.02 |    0.40
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.15 |    1.38 |    0.00 |    9.15
thrust   |   -0.00    0.00   -0.00 |    0.66    0.67    0.66 |   -3.44   -3.39   -3.33 |    3.36    3.45    3.45
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.52 |    0.17 |    1.02 |    2.12
rewards  |  -17.33 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7304   0.2353   1.1442 149.7444 133.1056  53.3563
***** Episode 66021, Mean R = -18.5  Std R = 4.8  Min R = -26.8
PolicyLoss: 2.24
Policy_Entropy: 0.216
Policy_KL: 0.00647
Policy_SD: 0.541
Steps: 1.16e+04
TotalSteps: 2.42e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00371


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0006   0.0027   8.4490   2.3854   3.1146
ADVA:  (19443,) (34993,) 0.555625410796445
ADV1:  0.0011886999800735862 0.0004623993371761781 0.009124122675946054 0.1341927162343972 -0.19323477510477527
ADVB:  (22123,) (34993,) 0.6322121567170577
ADV2:  0.25996400977785816 0.42842321834508307 0.516613910701287 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1080   0.4571   2.2368 149.7444 133.1056  53.3563
***** Episode 66052, Mean R = -18.3  Std R = 7.6  Min R = -48.6
PolicyLoss: 2.11
Policy_Entropy: 0.215
Policy_KL: 0.00875
Policy_SD: 0.54
Steps: 1.16e+04
TotalSteps: 

ADVA:  (20046,) (35251,) 0.5686647187313835
ADV1:  0.0005027612690794159 0.00019132899509008127 0.007449549196383417 0.04487021246028233 -0.07289600824093234
ADVB:  (20535,) (35251,) 0.5825366656265071
ADV2:  0.15653124442195346 0.39219427607765867 0.5274651960170652 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0530   0.4455   2.0550 149.7444 133.1056  53.3563
***** Episode 66269, Mean R = -16.9  Std R = 5.6  Min R = -31.4
PolicyLoss: 2.08
Policy_Entropy: 0.216
Policy_KL: 0.00757
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 2.43e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00399


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0014   0.0049   8.4490   2.3854   3.1146
ADVA:  (19905,) (34923,) 0.5699682157890216
ADV1:  0.000600773194609952 0.00019419570470048718 0.007205234162709151 0.04481329793255273 -0.06306645169413828
ADVB:  (20488,) (34923,) 0.5866620851587779
ADV2:  0.15870776818324647 0.3801059037415804 0.507445103645922

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0016   8.4490   2.3854   3.1146
ADVA:  (17454,) (34972,) 0.49908498227153153
ADV1:  0.0008782317701566955 0.0010384309912471969 0.008397264984854727 0.06836433935576897 -0.05534195922203575
ADVB:  (22142,) (34972,) 0.6331350794921652
ADV2:  0.31377623918796144 0.5542131797143492 0.6581667552159541 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.0416   3.3255  10.5595 149.7444 133.1056  53.3563
***** Episode 66517, Mean R = -14.7  Std R = 5.4  Min R = -30.7
PolicyLoss: 2.73
Policy_Entropy: 0.216
Policy_KL: 0.013
Policy_SD: 0.532
Steps: 1.18e+04
TotalSteps: 2.44e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00325


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0017   0.0064   8.4490   2.3854   3.1146
ADVA:  (20767,) (35192,) 0.5901057058422369
ADV1:  0.0 -0.001606288214573304 0.008335339096911002 0.07069892826716634 -0.08265099435897039
ADVB:  (16106,) (35192,) 0

***** Episode 66734, Mean R = -16.5  Std R = 4.9  Min R = -30.7
PolicyLoss: 2.35
Policy_Entropy: 0.216
Policy_KL: 0.00978
Policy_SD: 0.537
Steps: 1.18e+04
TotalSteps: 2.45e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00329


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0038   0.0023   0.0096   8.4490   2.3854   3.1146
ADVA:  (19368,) (35556,) 0.5447181910226122
ADV1:  0.0 -0.00044057717622042995 0.009466257064100073 0.04329746590225797 -0.07347029107275205
ADVB:  (19399,) (35556,) 0.5455900551243109
ADV2:  0.10242663836534832 0.40206025060983736 0.5548328909431038 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  16.3671  17.8287  56.3271 149.7444 133.1056  53.3563
***** Episode 66765, Mean R = -18.1  Std R = 7.4  Min R = -42.5
PolicyLoss: 2.3
Policy_Entropy: 0.216
Policy_KL: 0.0137
Policy_SD: 0.534
Steps: 1.2e+04
TotalSteps: 2.45e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00312


ValFun  Gradients: u/sd/Max/C M

seeker_angles |    0.01    0.00 |    0.08    0.08 |   -0.98   -0.92 |    0.99    0.99
cs_angles |  0.0061  0.0044 |  0.0818  0.0787 | -0.9806 -0.9226 |  0.9904  0.9915
optical_flow |  0.0001  0.0001 |  0.0188  0.0200 | -1.0220 -1.1558 |  1.2726  1.1015
v_err    | -0.0106 |  0.0602 | -0.4533 |  0.1473
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -21.78 |    4.39 |  -37.08 |  -13.07
steps    |     380 |      20 |     333 |     422
***** Episode 67013, Mean R = -17.0  Std R = 5.0  Min R = -25.9
PolicyLoss: 1.92
Policy_Entropy: 0.217
Policy_KL: 0.00677
Policy_SD: 0.536
Steps: 1.18e+04
TotalSteps: 2.46e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00329


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0046   8.4490   2.3854   3.1146
ADVA:  (19566,) (35202,) 0.555820692006136
ADV1:  0.0010790717747849628 0.0005018217217814139 0.008161410382951569 0.0498325

thrust   |    0.00   -0.00    0.01 |    0.65    0.67    0.66 |   -3.39   -3.40   -3.39 |    3.44    3.41    3.43
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.49 |    0.16 |    1.04 |    2.10
rewards  |  -16.81 |    5.58 |  -36.57 |   -7.16
fuel_rewards |   -4.28 |    0.47 |   -6.02 |   -2.97
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    2.97 |   13.44 |    0.01 |  233.84
norm_af  |    1.73 |    0.88 |    0.04 |    3.37
norm_wf  |    0.01 |    0.01 |    0.00 |    0.04
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |   -0.04   -0.03   -0.13 |    1.15    0.66    1.83 |   -3.14   -1.56   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.07   -0.05 |    0.07    0.05    0.06
a_f      |   -0.04   -0.14 |    0.66

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.6259   2.9090  12.5379 149.7444 133.1056  53.3563
***** Episode 67602, Mean R = -17.2  Std R = 4.9  Min R = -33.9
PolicyLoss: 2.69
Policy_Entropy: 0.216
Policy_KL: 0.00901
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 2.48e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00312


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   8.4490   2.3854   3.1146
ADVA:  (21926,) (35248,) 0.6220494779845664
ADV1:  0.0 -0.0005676316114182529 0.007502775611685494 0.047888003757412645 -0.0604298239605993
ADVB:  (17022,) (35248,) 0.48292101679527916
ADV2:  0.0 0.26652221404004234 0.42128204167668326 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9514   0.7633   3.7337 149.7444 133.1056  53.3563
Update Cnt = 2180    ET =   1466.6   Stats:  Mean, Std, Min, Max
r_f      |    9.33   -5.01   -6.80 |  179.67  170.79  197.66 | -390.83 -372.28 -386.81 |  379.38  380.24  377.64
v_f      |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.4460   1.0204   5.2854 149.7444 133.1056  53.3563
***** Episode 67850, Mean R = -18.8  Std R = 5.1  Min R = -27.5
PolicyLoss: 2.27
Policy_Entropy: 0.216
Policy_KL: 0.0106
Policy_SD: 0.545
Steps: 1.18e+04
TotalSteps: 2.49e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00354


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0055   8.4490   2.3854   3.1146
ADVA:  (18736,) (35168,) 0.5327570518653321
ADV1:  0.0010640468341989884 0.0007550916746849544 0.008434303612368715 0.05427006431965753 -0.07823775920197906
ADVB:  (22589,) (35168,) 0.6423168789808917
ADV2:  0.2964604539605442 0.4937896196372918 0.5832083942299129 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8269   0.3022   1.4249 149.7444 133.1056  53.3563
***** Episode 67881, Mean R = -16.9  Std R = 6.5  Min R = -33.3
PolicyLoss: 2.38
Policy_Entropy: 0.217
Policy_KL: 0.00522
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps

ADVA:  (20029,) (35204,) 0.568941029428474
ADV1:  2.7990175632186184e-05 9.912181839284022e-05 0.007246226088665518 0.06329094125434787 -0.05424042702764207
ADVB:  (17551,) (35204,) 0.498551300988524
ADV2:  0.0 0.3740741273798559 0.5982851711318441 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  13.8414   8.9040  37.2353 149.7444 133.1056  53.3563
***** Episode 68098, Mean R = -16.1  Std R = 4.5  Min R = -25.1
PolicyLoss: 2.33
Policy_Entropy: 0.217
Policy_KL: 0.00758
Policy_SD: 0.538
Steps: 1.17e+04
TotalSteps: 2.5e+07
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00278


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0033   0.0019   0.0071   8.4490   2.3854   3.1146
ADVA:  (21515,) (35186,) 0.6114647871312454
ADV1:  0.0 -0.0015010821640814753 0.007884198753730266 0.05482072356740747 -0.05639553007080636
ADVB:  (15165,) (35186,) 0.43099528221451716
ADV2:  0.0 0.21873752772123065 0.39830298125204733 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0028   8.4490   2.3854   3.1146
ADVA:  (21932,) (34907,) 0.6282980491018993
ADV1:  0.0 -0.0009727376901953036 0.009584872937011208 0.0655516419723754 -0.08005098348493161
ADVB:  (16717,) (34907,) 0.4789010800126049
ADV2:  0.0 0.270975717100744 0.4603024401531746 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9178   0.8053   3.7855 149.7444 133.1056  53.3563
***** Episode 68346, Mean R = -17.1  Std R = 7.3  Min R = -44.2
PolicyLoss: 1.74
Policy_Entropy: 0.217
Policy_KL: 0.00739
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 2.51e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00314


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0014   8.4490   2.3854   3.1146
ADVA:  (19912,) (35055,) 0.5680216802168022
ADV1:  0.0008130166058209071 0.00026426888733817566 0.008163882172365309 0.05629960078835655 -0.0769923861703754
ADVB:  (21042,) (35055,) 0.6002567394094993

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0009   0.0035   8.4490   2.3854   3.1146
ADVA:  (19058,) (34895,) 0.5461527439461241
ADV1:  5.23418650884358e-05 6.95050132074392e-05 0.007127186253411385 0.06378753648981855 -0.06762571497176867
ADVB:  (19151,) (34895,) 0.5488178822180828
ADV2:  0.09785663014033934 0.37653541401757307 0.5264028361585519 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0953   0.4527   2.0012 149.7444 133.1056  53.3563
***** Episode 68594, Mean R = -16.2  Std R = 5.0  Min R = -28.9
PolicyLoss: 2.12
Policy_Entropy: 0.218
Policy_KL: 0.00515
Policy_SD: 0.534
Steps: 1.16e+04
TotalSteps: 2.52e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.0027


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0034   8.4490   2.3854   3.1146
ADVA:  (17914,) (35051,) 0.5110838492482382
ADV1:  0.0 -0.00026930486778

attitude |    0.09    0.02    0.11 |    1.22    0.67    1.87 |   -3.14   -1.56   -3.14 |    3.14    1.55    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.07   -0.05 |    0.08    0.05    0.08
a_f      |    0.03    0.06 |    0.68    1.89 |   -1.39   -3.11 |    1.47    3.13
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.03 |    0.03    0.02    0.01
w_rewards |   -0.23 |    0.17 |   -1.58 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.29 |    0.00 |    2.94
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -0.99 |    0.99    0.93
cs_angles |  0.0043  0.0012 |  0.0824  0.0789 | -0.9935 -0.9944 |  0.9939  0.9299
optical_flow | -0.0001  0.0001 |  0.0205  0.0206 | -1.1996 -1.0095 |  0.9676  0.9667
v_err    | -0.0108 |  0.0602 | -0.4544 |  0.1333
landing_rewards |    9.58 |    2.00 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8680   0.3699   1.5789 149.7444 133.1056  53.3563
Update Cnt = 2230    ET =   1327.5   Stats:  Mean, Std, Min, Max
r_f      |  -11.10   -8.73   -1.79 |  191.73  169.72  200.58 | -389.99 -361.44 -387.01 |  396.52  388.23  387.72
v_f      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.11   -0.09 |    0.09    0.10    0.11
r_i      |  -37.80   12.82  -22.57 |  702.76  654.91  747.56 |-1287.69-1365.37-1249.34 | 1354.86 1323.05 1303.47
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.09    0.10
norm_rf  |    0.15 |    0.07 |    0.01 |    0.54
norm_vf  |    0.08 |    0.01 |    0.03 |    0.12
gs_f     |    1.28 |    1.93 |    0.01 |   15.54
thrust   |    0.00   -0.00    0.00 |    0.65    0.67    0.66 |   -3.36   -3.42   -3.42 |    3.38    3.33    3.42
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.49 |    0.17 |    1.08 |    2.11
rewards  |  -16.86 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0650   0.4074   2.1508 149.7444 133.1056  53.3563
***** Episode 69431, Mean R = -16.6  Std R = 5.2  Min R = -29.5
PolicyLoss: 2.05
Policy_Entropy: 0.217
Policy_KL: 0.00837
Policy_SD: 0.541
Steps: 1.18e+04
TotalSteps: 2.55e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00298


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0007   0.0027   8.4490   2.3854   3.1146
ADVA:  (18663,) (35228,) 0.529777449755876
ADV1:  0.0006164929552741061 0.0004898861372549038 0.007468169476502036 0.04894024958009957 -0.07367220014402309
ADVB:  (21018,) (35228,) 0.596627682525264
ADV2:  0.17264888884253105 0.42208947427690335 0.5570728668613624 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0372   0.3585   1.8904 149.7444 133.1056  53.3563
***** Episode 69462, Mean R = -15.6  Std R = 5.5  Min R = -28.6
PolicyLoss: 2.18
Policy_Entropy: 0.218
Policy_KL: 0.0076
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps:

ADVA:  (20525,) (35212,) 0.5828978757241849
ADV1:  0.0002932028985045843 4.608712227010941e-05 0.008198941353812243 0.05513447121687193 -0.07093033300091295
ADVB:  (19743,) (35212,) 0.5606895376576168
ADV2:  0.12134898005876074 0.40009907322453075 0.5478745329630065 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.3303   4.7912  19.4873 149.7444 133.1056  53.3563
***** Episode 69679, Mean R = -15.9  Std R = 5.1  Min R = -26.1
PolicyLoss: 2.2
Policy_Entropy: 0.218
Policy_KL: 0.0243
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 2.56e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00321


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0012   0.0049   8.4490   2.3854   3.1146
ADVA:  (20608,) (35198,) 0.5854878118074891
ADV1:  0.0004201599599608294 -0.00011599392322195533 0.008929743677006573 0.05513447121687193 -0.06766900110478263
ADVB:  (20002,) (35198,) 0.5682709244843457
ADV2:  0.1303466480712004 0.38040959534399843 0.5085874205764384 

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0005   0.0024   8.4490   2.3854   3.1146
ADVA:  (18303,) (35238,) 0.5194108632726034
ADV1:  0.0012003124940957672 0.0009007758155659928 0.007921504504903885 0.10641308865127247 -0.10683968556431012
ADVB:  (23152,) (35238,) 0.6570179919405188
ADV2:  0.31114403593346907 0.5016423895170363 0.5774791736394407 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4191   0.5292   2.5919 149.7444 133.1056  53.3563
***** Episode 69927, Mean R = -17.0  Std R = 5.0  Min R = -30.9
PolicyLoss: 2.34
Policy_Entropy: 0.22
Policy_KL: 0.00646
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 2.57e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00307


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0015   0.0054   8.4490   2.3854   3.1146
ADVA:  (21508,) (35336,) 0.6086710436948155
ADV1:  0.0 -0.0004835549392

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0020   8.4490   2.3854   3.1146
ADVA:  (18125,) (35151,) 0.5156325566840204
ADV1:  0.00023380359612021513 0.0002004376597045085 0.007384753507911875 0.09666308458125034 -0.10676119129751827
ADVB:  (20202,) (35151,) 0.5747204915934113
ADV2:  0.1529381260263075 0.4191275450349795 0.5702559356757362 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.9225   2.1861  10.1735 149.7444 133.1056  53.3563
***** Episode 70175, Mean R = -15.8  Std R = 4.4  Min R = -27.8
PolicyLoss: 2.24
Policy_Entropy: 0.219
Policy_KL: 0.00905
Policy_SD: 0.538
Steps: 1.16e+04
TotalSteps: 2.58e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00221


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0014   0.0051   8.4490   2.3854   3.1146
ADVA:  (19451,) (34851,) 0.5581188488135204
ADV1:  0.000363793155304633

seeker_angles |    0.00    0.00 |    0.09    0.08 |   -0.96   -0.97 |    0.96    0.99
cs_angles |  0.0016  0.0027 |  0.0856  0.0791 | -0.9620 -0.9670 |  0.9604  0.9915
optical_flow |  0.0001  0.0001 |  0.0212  0.0213 | -0.8384 -0.9436 |  1.2153  1.1865
v_err    | -0.0108 |  0.0598 | -0.4558 |  0.2383
landing_rewards |    9.61 |    1.93 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.07 |    0.02
tracking_rewards |  -21.75 |    4.98 |  -46.05 |  -14.20
steps    |     379 |      20 |     330 |     418
***** Episode 70423, Mean R = -16.0  Std R = 4.6  Min R = -28.7
PolicyLoss: 1.82
Policy_Entropy: 0.219
Policy_KL: 0.00901
Policy_SD: 0.535
Steps: 1.17e+04
TotalSteps: 2.59e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.0027


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0017   8.4490   2.3854   3.1146
ADVA:  (22517,) (35414,) 0.63582

attitude |    0.00    0.06   -0.12 |    1.10    0.67    1.77 |   -3.14   -1.53   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.06   -0.05 |    0.08    0.05    0.07
a_f      |    0.05   -0.16 |    0.67    1.77 |   -1.33   -3.11 |    1.52    3.11
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.02 |    0.03    0.02    0.02
w_rewards |   -0.22 |    0.16 |   -1.31 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.83
seeker_angles |    0.00    0.01 |    0.08    0.08 |   -1.00   -0.97 |    0.96    0.98
cs_angles |  0.0014  0.0051 |  0.0833  0.0820 | -0.9984 -0.9721 |  0.9620  0.9809
optical_flow |  0.0001  0.0000 |  0.0190  0.0194 | -0.9740 -0.9674 |  0.8237  1.1347
v_err    | -0.0110 |  0.0597 | -0.4530 |  0.1330
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7747   0.6310   2.9066 149.7444 133.1056  53.3563
Update Cnt = 2290    ET =   1289.6   Stats:  Mean, Std, Min, Max
r_f      |   -6.24   14.34   -2.03 |  184.67  165.51  194.20 | -395.51 -381.18 -384.03 |  382.42  383.68  383.60
v_f      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.08   -0.10   -0.10 |    0.08    0.09    0.09
r_i      |    3.34   37.18   -9.59 |  669.05  650.23  767.35 |-1354.89-1266.10-1337.12 | 1304.07 1362.64 1282.99
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.15 |    0.06 |    0.01 |    0.37
norm_vf  |    0.07 |    0.01 |    0.03 |    0.11
gs_f     |    1.93 |    8.67 |    0.00 |  143.67
thrust   |    0.00   -0.00   -0.00 |    0.65    0.66    0.66 |   -3.44   -3.41   -3.45 |    3.44    3.36    3.46
norm_thrust |    0.88 |    0.72 |    0.00 |    3.46
fuel     |    1.50 |    0.16 |    1.08 |    2.02
rewards  |  -16.37 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.9801   2.7242  14.1859 149.7444 133.1056  53.3563
***** Episode 71291, Mean R = -16.3  Std R = 5.2  Min R = -29.5
PolicyLoss: 1.94
Policy_Entropy: 0.218
Policy_KL: 0.0335
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 2.62e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00265


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0011   0.0039   8.4490   2.3854   3.1146
ADVA:  (19928,) (35406,) 0.5628424560808902
ADV1:  0.001525455324733721 0.0011982072386124496 0.007803515594727962 0.14197382121802862 -0.09036936281147595
ADVB:  (22204,) (35406,) 0.627125345986556
ADV2:  0.26929246966808806 0.468581792834826 0.5781848412963132 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.1544   1.2734   5.5934 149.7444 133.1056  53.3563
***** Episode 71322, Mean R = -16.0  Std R = 3.8  Min R = -25.6
PolicyLoss: 2.31
Policy_Entropy: 0.219
Policy_KL: 0.0254
Policy_SD: 0.531
Steps: 1.17e+04
TotalSteps: 2.

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0631   0.3701   1.9633 149.7444 133.1056  53.3563
***** Episode 71539, Mean R = -17.3  Std R = 5.0  Min R = -29.0
PolicyLoss: 2.06
Policy_Entropy: 0.219
Policy_KL: 0.0077
Policy_SD: 0.534
Steps: 1.17e+04
TotalSteps: 2.63e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00228


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0013   8.4490   2.3854   3.1146
ADVA:  (18612,) (35073,) 0.5306646138054915
ADV1:  2.058686623499298e-05 -0.00016056236480952476 0.0072721428105745126 0.06460195663711393 -0.10119865482407964
ADVB:  (19686,) (35073,) 0.5612864596698315
ADV2:  0.11562452128741302 0.37748249677990625 0.5402031930541017 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4045   0.5825   2.7941 149.7444 133.1056  53.3563
***** Episode 71570, Mean R = -16.9  Std R = 5.5  Min R = -28.6
PolicyLoss: 2.06
Policy_Entropy: 0.22
Policy_KL: 0.00718
Policy_SD: 0.535
Steps: 1.18e+04
TotalS

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1234   0.4058   2.1928 149.7444 133.1056  53.3563
***** Episode 71787, Mean R = -17.3  Std R = 8.0  Min R = -51.0
PolicyLoss: 1.58
Policy_Entropy: 0.219
Policy_KL: 0.00999
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 2.64e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.00251


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0011   0.0039   8.4490   2.3854   3.1146
ADVA:  (21791,) (35054,) 0.6216408969019227
ADV1:  0.0006406719092338468 -0.00042147713436513427 0.009397644902027753 0.08735885088582834 -0.13976119339421733
ADVB:  (19435,) (35054,) 0.554430307525532
ADV2:  0.0823151721968686 0.2998155956261176 0.42932744677373064 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7926   0.2517   1.2584 149.7444 133.1056  53.3563
***** Episode 71818, Mean R = -17.2  Std R = 7.5  Min R = -45.0
PolicyLoss: 1.65
Policy_Entropy: 0.219
Policy_KL: 0.0108
Policy_SD: 0.539
Steps: 1.15e+04
TotalSte

ADVA:  (20720,) (35313,) 0.586752753943307
ADV1:  0.0016456377585836674 0.0012541856191138274 0.00762312605187305 0.06730104746965171 -0.05691321978095115
ADVB:  (22569,) (35313,) 0.6391130745051398
ADV2:  0.28259962060204225 0.47555676036861966 0.5739901647623287 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2110   0.4532   2.1949 149.7444 133.1056  53.3563
***** Episode 72035, Mean R = -16.3  Std R = 5.6  Min R = -32.2
PolicyLoss: 2.28
Policy_Entropy: 0.22
Policy_KL: 0.0102
Policy_SD: 0.54
Steps: 1.19e+04
TotalSteps: 2.65e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00255


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0029   8.4490   2.3854   3.1146
ADVA:  (21368,) (35144,) 0.6080127475529251
ADV1:  0.0025608933742671675 0.002171819264111449 0.007070487907913971 0.06730104746965171 -0.05691321978095115
ADVB:  (23909,) (35144,) 0.6803152743000228
ADV2:  0.3699137764376343 0.5258427638648501 0.5903716471805778 3.0 0.

***** Episode 72283, Mean R = -18.5  Std R = 6.0  Min R = -39.3
PolicyLoss: 2
Policy_Entropy: 0.22
Policy_KL: 0.0104
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 2.66e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00288


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0017   8.4490   2.3854   3.1146
ADVA:  (19792,) (35182,) 0.5625604002046501
ADV1:  0.0008424607006384185 0.000387533561992403 0.007798038012188623 0.06943155215149066 -0.1455958660123
ADVB:  (20711,) (35182,) 0.5886817122392133
ADV2:  0.18054379697302098 0.40563637610180914 0.5325935094187257 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   9.1593   4.7399  18.0741 149.7444 133.1056  53.3563
***** Episode 72314, Mean R = -16.8  Std R = 5.8  Min R = -37.1
PolicyLoss: 2.11
Policy_Entropy: 0.22
Policy_KL: 0.00852
Policy_SD: 0.543
Steps: 1.18e+04
TotalSteps: 2.66e+07
VF_0_Ex

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |    0.03   -0.12 |    0.62    1.88 |   -1.28   -3.14 |    1.51    3.12
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.04 |    0.03    0.02    0.02
w_rewards |   -0.21 |    0.16 |   -0.70 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.29 |    0.00 |    2.81
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -1.00 |    1.00    0.98
cs_angles |  0.0001  0.0039 |  0.0817  0.0790 | -0.9769 -0.9965 |  0.9986  0.9841
optical_flow |  0.0001  0.0001 |  0.0206  0.0207 | -0.8801 -0.9678 |  1.5274  1.0898
v_err    | -0.0109 |  0.0598 | -0.4536 |  0.2088
landing_rewards |    9.68 |    1.77 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.07 |    0.01
tracking_rewards |  -21.19 |    5.05 |  -45.90 |  -13.26
steps    |     377 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.1122   0.7904   3.4928 149.7444 133.1056  53.3563
Update Cnt = 2350    ET =    771.7   Stats:  Mean, Std, Min, Max
r_f      |   -8.60   16.03    6.30 |  189.87  165.44  201.86 | -374.92 -375.29 -391.88 |  390.93  396.24  390.34
v_f      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.11   -0.08   -0.13 |    0.10    0.11    0.08
r_i      |    4.48   60.82    3.94 |  683.92  656.58  764.86 |-1310.06-1333.08-1319.51 | 1316.02 1287.11 1343.35
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.15 |    0.06 |    0.01 |    0.36
norm_vf  |    0.07 |    0.01 |    0.03 |    0.13
gs_f     |    1.32 |    1.90 |    0.01 |   15.99
thrust   |    0.00   -0.00    0.00 |    0.65    0.67    0.66 |   -3.37   -3.42   -3.43 |    3.43    3.37    3.45
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.52 |    0.16 |    1.15 |    2.07
rewards  |  -16.72 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9464   0.3108   1.4837 149.7444 133.1056  53.3563
***** Episode 73151, Mean R = -17.6  Std R = 5.6  Min R = -29.2
PolicyLoss: 1.79
Policy_Entropy: 0.22
Policy_KL: 0.0113
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps: 2.69e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00236


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0021   8.4490   2.3854   3.1146
ADVA:  (19080,) (35327,) 0.5400968098055312
ADV1:  0.0005200031798585532 6.527378654036939e-05 0.007991204650424135 0.07117850255585512 -0.08825187334127638
ADVB:  (21319,) (35327,) 0.6034760947717044
ADV2:  0.19869923726367802 0.40608327385570053 0.5265566969158073 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8058   0.3079   1.4902 149.7444 133.1056  53.3563
***** Episode 73182, Mean R = -15.9  Std R = 5.7  Min R = -30.1
PolicyLoss: 2.06
Policy_Entropy: 0.22
Policy_KL: 0.00698
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7745   0.2932   1.5541 149.7444 133.1056  53.3563
***** Episode 73399, Mean R = -16.3  Std R = 6.0  Min R = -30.3
PolicyLoss: 1.88
Policy_Entropy: 0.221
Policy_KL: 0.00657
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 2.7e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00254


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0023   8.4490   2.3854   3.1146
ADVA:  (19726,) (35217,) 0.560127211290002
ADV1:  0.0005171265007140532 -2.6226917958961363e-05 0.0082964837317655 0.07607923469006478 -0.14644809643171397
ADVB:  (21105,) (35217,) 0.5992844364937389
ADV2:  0.16886591540445312 0.36837005820400026 0.49728302683701764 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9518   0.3611   1.7157 149.7444 133.1056  53.3563
***** Episode 73430, Mean R = -17.2  Std R = 6.1  Min R = -31.2
PolicyLoss: 1.88
Policy_Entropy: 0.222
Policy_KL: 0.00703
Policy_SD: 0.531
Steps: 1.18e+04
TotalSte

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9705   0.6634   3.3780 149.7444 133.1056  53.3563
***** Episode 73647, Mean R = -15.5  Std R = 3.8  Min R = -24.4
PolicyLoss: 2.1
Policy_Entropy: 0.22
Policy_KL: 0.0121
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 2.71e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00258


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0020   8.4490   2.3854   3.1146
ADVA:  (17443,) (35312,) 0.49396805618486633
ADV1:  0.00012143817466683977 -6.150182342616852e-05 0.007745587265154474 0.0830491524716952 -0.06361204825345607
ADVB:  (21451,) (35312,) 0.6074705482555505
ADV2:  0.2086408070405501 0.43716727684392637 0.5676939611033672 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.7662   1.2223   5.0342 149.7444 133.1056  53.3563
***** Episode 73678, Mean R = -15.4  Std R = 4.5  Min R = -26.4
PolicyLoss: 2.21
Policy_Entropy: 0.22
Policy_KL: 0.00916
Policy_SD: 0.536
Steps: 1.18e+04
TotalSteps

ADVA:  (21301,) (35114,) 0.6066241385202483
ADV1:  0.0008892694786028161 -0.0003492669485255208 0.010800238084772401 0.06684808392645814 -0.2337715800309088
ADVB:  (20818,) (35114,) 0.5928689411630689
ADV2:  0.1485226553007909 0.31754507896575485 0.42227006627145375 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.0160   1.3501   5.7621 149.7444 133.1056  53.3563
***** Episode 73895, Mean R = -19.2  Std R = 8.1  Min R = -44.6
PolicyLoss: 1.65
Policy_Entropy: 0.22
Policy_KL: 0.00754
Policy_SD: 0.542
Steps: 1.16e+04
TotalSteps: 2.72e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.976
VF_0_Loss : 0.00255


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0019   0.0067   8.4490   2.3854   3.1146
ADVA:  (21050,) (35074,) 0.6001596624280093
ADV1:  0.00042310072474471307 -0.00034085094328605627 0.010333227799694266 0.11274327904388093 -0.2337715800309088
ADVB:  (19419,) (35074,) 0.5536579802702857
ADV2:  0.09103112290232684 0.3111124634386221 0.444835453857168

optical_flow |  0.0001  0.0002 |  0.0200  0.0186 | -1.3081 -1.1070 |  0.9075  1.0570
v_err    | -0.0110 |  0.0602 | -0.4528 |  0.1243
landing_rewards |    9.65 |    1.85 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.04
tracking_rewards |  -21.81 |    5.61 |  -51.36 |  -13.16
steps    |     378 |      20 |     339 |     422
***** Episode 74143, Mean R = -19.3  Std R = 6.6  Min R = -39.2
PolicyLoss: 1.6
Policy_Entropy: 0.22
Policy_KL: 0.00825
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 2.73e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00242


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0017   0.0062   8.4490   2.3854   3.1146
ADVA:  (23113,) (35030,) 0.6598058806737083
ADV1:  -0.0003175146907534246 -0.0017672107746020357 0.008408883181304979 0.1675370895333887 -0.07293118820526323
ADVB:  (11738,) (35030,) 0.33508421353125895
ADV2:  0.0 0.16044755766307273 0.36430740170961856 3.0 0.0
Policy  Gradients: u/sd/Max/C Ma

seeker_angles |    0.00    0.00 |    0.09    0.08 |   -1.00   -0.95 |    0.99    1.00
cs_angles |  0.0015  0.0022 |  0.0869  0.0802 | -0.9981 -0.9481 |  0.9943  0.9993
optical_flow |  0.0000  0.0001 |  0.0208  0.0192 | -0.9660 -0.9626 |  1.3997  1.0964
v_err    | -0.0111 |  0.0599 | -0.4520 |  0.0971
landing_rewards |    9.26 |    2.62 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.08 |    0.03
tracking_rewards |  -21.91 |    5.57 |  -48.28 |  -13.72
steps    |     377 |      20 |     332 |     415
***** Episode 74453, Mean R = -16.8  Std R = 5.6  Min R = -33.4
PolicyLoss: 1.75
Policy_Entropy: 0.221
Policy_KL: 0.00876
Policy_SD: 0.546
Steps: 1.16e+04
TotalSteps: 2.74e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.00237


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0011   0.0039   8.4490   2.3854   3.1146
ADVA:  (20637,) (35252,) 0.5854135935549756
ADV1:  0.0005742343046239475 -0.00042014359968428193 0.009598549508629352 0.0976

attitude |   -0.02   -0.06    0.05 |    1.17    0.70    1.80 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |   -0.06    0.08 |    0.70    1.80 |   -1.48   -3.13 |    1.49    3.14
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.04 |    0.04    0.02    0.02
w_rewards |   -0.18 |    0.14 |   -0.75 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.30 |    0.00 |    2.86
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.97   -0.99 |    1.00    1.00
cs_angles |  0.0049  0.0009 |  0.0829  0.0815 | -0.9749 -0.9891 |  0.9986  0.9986
optical_flow |  0.0002  0.0001 |  0.0223  0.0202 | -1.1339 -1.1312 |  1.1703  1.0783
v_err    | -0.0109 |  0.0597 | -0.4527 |  0.1577
landing_rewards |    9.39 |    2.40 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  37.0088   7.4767  54.2371 149.7444 133.1056  53.3563
Update Cnt = 2420    ET =    789.3   Stats:  Mean, Std, Min, Max
r_f      |    5.87    1.98   -8.73 |  178.12  173.09  212.03 | -395.85 -367.04 -392.14 |  390.59  379.21  376.64
v_f      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.09    0.09
r_i      |   32.12  -12.24    5.96 |  641.97  644.22  823.12 |-1280.33-1312.00-1335.15 | 1336.43 1292.04 1306.85
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.10    0.09
norm_rf  |    0.15 |    0.07 |    0.02 |    0.40
norm_vf  |    0.07 |    0.01 |    0.03 |    0.11
gs_f     |    1.60 |    2.15 |    0.01 |   16.89
thrust   |    0.01    0.00   -0.00 |    0.66    0.67    0.67 |   -3.43   -3.34   -3.46 |    3.43    3.45    3.45
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.54 |    0.17 |    1.15 |    2.17
rewards  |  -16.65 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   8.8740   4.1048  17.1837 149.7444 133.1056  53.3563
***** Episode 75321, Mean R = -18.5  Std R = 8.1  Min R = -44.5
PolicyLoss: 2.24
Policy_Entropy: 0.221
Policy_KL: 0.00842
Policy_SD: 0.54
Steps: 1.18e+04
TotalSteps: 2.78e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.0032


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0021   0.0077   8.4490   2.3854   3.1146
ADVA:  (20144,) (35337,) 0.5700540509947081
ADV1:  0.0 -0.0007906114140739852 0.008525216811218749 0.11048769585149748 -0.18069627834498392
ADVB:  (17623,) (35337,) 0.4987123977700427
ADV2:  0.0 0.3226662765242747 0.49443978231392316 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.8296   1.1687   4.6015 149.7444 133.1056  53.3563
***** Episode 75352, Mean R = -16.7  Std R = 4.9  Min R = -26.1
PolicyLoss: 1.97
Policy_Entropy: 0.22
Policy_KL: 0.00596
Policy_SD: 0.542
Steps: 1.19e+04
TotalSteps: 2.78e+07
VF_0_ExplainedVarNew: 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.1342   0.9038   3.9966 149.7444 133.1056  53.3563
***** Episode 75569, Mean R = -17.6  Std R = 6.3  Min R = -33.3
PolicyLoss: 2.01
Policy_Entropy: 0.221
Policy_KL: 0.00764
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 2.78e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00277


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0016   8.4490   2.3854   3.1146
ADVA:  (20107,) (35533,) 0.5658683477330931
ADV1:  0.00035873870751185893 -0.00018393436849542557 0.007284793220635357 0.0552139045199177 -0.06386966386158086
ADVB:  (20266,) (35533,) 0.5703430613795627
ADV2:  0.11665947766956657 0.3295750291411451 0.45213655925439916 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0221   0.3494   1.8385 149.7444 133.1056  53.3563
***** Episode 75600, Mean R = -17.1  Std R = 4.2  Min R = -26.8
PolicyLoss: 1.75
Policy_Entropy: 0.221
Policy_KL: 0.00567
Policy_SD: 0.544
Steps: 1.19e+04
Total

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   9.4184   4.5646  20.3991 149.7444 133.1056  53.3563
***** Episode 75817, Mean R = -17.8  Std R = 7.1  Min R = -32.2
PolicyLoss: 1.6
Policy_Entropy: 0.221
Policy_KL: 0.00899
Policy_SD: 0.543
Steps: 1.17e+04
TotalSteps: 2.79e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00238


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0047   0.0028   0.0104   8.4490   2.3854   3.1146
ADVA:  (20776,) (35080,) 0.5922462941847206
ADV1:  0.000686393997791801 6.951593237831742e-05 0.009832573493080688 0.046448441539698515 -0.1076740666221152
ADVB:  (18933,) (35080,) 0.539709236031927
ADV2:  0.06668923578886435 0.3361738430199293 0.48595571300928464 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.0867   1.2837   6.4610 149.7444 133.1056  53.3563
***** Episode 75848, Mean R = -17.3  Std R = 6.3  Min R = -37.3
PolicyLoss: 1.91
Policy_Entropy: 0.221
Policy_KL: 0.0111
Policy_SD: 0.543
Steps: 1.16e+04
TotalSteps:

ADVA:  (17451,) (34825,) 0.5011055276381909
ADV1:  0.0007806601045332639 0.0009224817415130097 0.0068620406690211985 0.07722367806340369 -0.06949758531980565
ADVB:  (21273,) (34825,) 0.6108542713567839
ADV2:  0.2548108633576643 0.5043176505238135 0.6397893476924383 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.1346   1.3247   5.7126 149.7444 133.1056  53.3563
***** Episode 76065, Mean R = -18.4  Std R = 6.3  Min R = -38.7
PolicyLoss: 2.53
Policy_Entropy: 0.221
Policy_KL: 0.00722
Policy_SD: 0.544
Steps: 1.16e+04
TotalSteps: 2.8e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.0025


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0017   0.0010   0.0036   8.4490   2.3854   3.1146
ADVA:  (19090,) (34739,) 0.5495264688102709
ADV1:  0.0 -0.000436975541548247 0.0071472019761324 0.053413425687896976 -0.06672829392323776
ADVB:  (16983,) (34739,) 0.4888741759981577
ADV2:  0.0 0.33138427949434035 0.5405793636680827 3.0 0.0
Policy  Gradients: u/sd/Max/

seeker_angles |    0.00    0.00 |    0.08    0.09 |   -0.99   -0.96 |    0.97    0.99
cs_angles |  0.0035  0.0044 |  0.0849  0.0865 | -0.9914 -0.9566 |  0.9741  0.9890
optical_flow |  0.0000  0.0002 |  0.0193  0.0195 | -1.1021 -0.8717 |  1.1261  1.1795
v_err    | -0.0116 |  0.0613 | -0.4534 |  0.1215
landing_rewards |    9.71 |    1.68 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -22.36 |    5.78 |  -45.50 |  -13.87
steps    |     376 |      21 |     337 |     420
***** Episode 76313, Mean R = -18.3  Std R = 8.7  Min R = -50.6
PolicyLoss: 1.65
Policy_Entropy: 0.221
Policy_KL: 0.0119
Policy_SD: 0.544
Steps: 1.16e+04
TotalSteps: 2.81e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00286


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0006   0.0025   8.4490   2.3854   3.1146
ADVA:  (21057,) (35262,) 0.5971584141568828
ADV1:  0.00020665321744781258 -7.191617067496024e-05 0.008605791281063051 0.127916

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.06   -0.05 |    0.07    0.05    0.06
a_f      |   -0.06   -0.00 |    0.65    1.92 |   -1.38   -3.11 |    1.40    3.13
w_f      |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.03    0.01    0.02
w_rewards |   -0.20 |    0.15 |   -0.81 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.30 |    0.00 |    2.55
seeker_angles |    0.01    0.00 |    0.08    0.08 |   -0.93   -1.00 |    1.00    0.95
cs_angles |  0.0054  0.0004 |  0.0845  0.0824 | -0.9328 -0.9954 |  0.9962  0.9483
optical_flow | -0.0001  0.0001 |  0.0202  0.0187 | -0.8983 -0.9878 |  1.1516  0.9571
v_err    | -0.0111 |  0.0599 | -0.4531 |  0.1064
landing_rewards |    9.58 |    2.00 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -22.08 |    5.31 |  -50.27 |  -12.84
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5856   0.1921   0.9512 149.7444 133.1056  53.3563
Update Cnt = 2480    ET =    827.8   Stats:  Mean, Std, Min, Max
r_f      |    0.84   -7.04    6.83 |  187.68  168.38  197.86 | -397.51 -393.53 -380.15 |  384.97  362.15  396.44
v_f      |   -0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.12   -0.09   -0.10 |    0.10    0.09    0.10
r_i      |    7.20    6.71   41.21 |  671.15  672.51  770.33 |-1366.58-1296.77-1311.35 | 1340.09 1301.94 1273.88
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.15 |    0.06 |    0.03 |    0.51
norm_vf  |    0.08 |    0.01 |    0.04 |    0.14
gs_f     |    1.63 |    3.59 |    0.01 |   43.86
thrust   |   -0.00   -0.00   -0.00 |    0.66    0.66    0.66 |   -3.43   -3.45   -3.42 |    3.45    3.46    3.45
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.50 |    0.16 |    1.10 |    2.20
rewards  |  -16.64 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.8349   1.2325   5.6815 149.7444 133.1056  53.3563
***** Episode 77181, Mean R = -16.2  Std R = 6.2  Min R = -33.1
PolicyLoss: 2.3
Policy_Entropy: 0.222
Policy_KL: 0.00813
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 2.85e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00232


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0012   0.0043   8.4490   2.3854   3.1146
ADVA:  (19125,) (34941,) 0.5473512492487336
ADV1:  0.0011745846189345766 0.000307234343304963 0.009432232224694184 0.04893299252955824 -0.06975481490921966
ADVB:  (21716,) (34941,) 0.621504822414928
ADV2:  0.21039312522005713 0.4080340617585168 0.5218433872512115 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0069   0.3274   1.6529 149.7444 133.1056  53.3563
***** Episode 77212, Mean R = -16.6  Std R = 6.8  Min R = -35.1
PolicyLoss: 2.01
Policy_Entropy: 0.222
Policy_KL: 0.00692
Policy_SD: 0.535
Steps: 1.15e+04
TotalSteps:

ADVA:  (19439,) (35448,) 0.5483807266982622
ADV1:  0.0008368140301081095 8.946826338449965e-05 0.008950711025193219 0.11507221788718369 -0.11200675071193411
ADVB:  (21522,) (35448,) 0.6071428571428571
ADV2:  0.20774193663839796 0.3948296544939573 0.49301061979415145 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4956   0.5563   2.6267 149.7444 133.1056  53.3563
***** Episode 77429, Mean R = -17.1  Std R = 6.2  Min R = -31.9
PolicyLoss: 1.98
Policy_Entropy: 0.222
Policy_KL: 0.0104
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 2.86e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00256


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0002   0.0009   8.4490   2.3854   3.1146
ADVA:  (19467,) (35371,) 0.5503661191371463
ADV1:  0.0008291201788478159 0.00039490156054636553 0.008319887728608635 0.11507221788718369 -0.11200675071193411
ADVB:  (21102,) (35371,) 0.5965904271861129
ADV2:  0.1725350159520353 0.3771806746076906 0.4978383681983375 3

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0012   0.0051   8.4490   2.3854   3.1146
ADVA:  (20554,) (34738,) 0.5916863377281363
ADV1:  0.0 -0.0005864450883136128 0.008676240380741962 0.06903004729094919 -0.09421855875632823
ADVB:  (17661,) (34738,) 0.5084057804133801
ADV2:  0.013474907542740736 0.30207833620238833 0.4701117042105582 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.2433   1.9750   7.5173 149.7444 133.1056  53.3563
***** Episode 77677, Mean R = -17.5  Std R = 7.3  Min R = -40.1
PolicyLoss: 1.82
Policy_Entropy: 0.22
Policy_KL: 0.012
Policy_SD: 0.554
Steps: 1.16e+04
TotalSteps: 2.86e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.00368


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0013   0.0048   8.4490   2.3854   3.1146
ADVA:  (20787,) (34941,) 0.5949171460461922
ADV1:  0.00026370417627729487 -0.00026663932207015537 0.009175944284788946 0.06903004729094919 -0.09546455113670865
ADVB:  (18939,) (3494

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0012   0.0047   8.4490   2.3854   3.1146
ADVA:  (19929,) (35320,) 0.5642412231030578
ADV1:  0.0 -0.0008593525274798201 0.008368207148604534 0.04680903101595818 -0.11123328165835245
ADVB:  (18827,) (35320,) 0.5330407701019253
ADV2:  0.06275603023491591 0.3217935256053703 0.459224251187391 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2904   0.4560   2.2583 149.7444 133.1056  53.3563
***** Episode 77925, Mean R = -16.7  Std R = 5.2  Min R = -31.1
PolicyLoss: 1.85
Policy_Entropy: 0.221
Policy_KL: 0.00689
Policy_SD: 0.534
Steps: 1.17e+04
TotalSteps: 2.87e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00259


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0019   8.4490   2.3854   3.1146
ADVA:  (17822,) (35188,) 0.5064794816414687
ADV1:  9.719204626023811e-05 -9.683445253261299e-06 0.00725283511441538 0.08411952758813224 -0.09285520994296692
ADVB:  (20568,) (35188,)

optical_flow |  0.0000  0.0001 |  0.0195  0.0197 | -1.0366 -1.0571 |  1.1151  1.2157
v_err    | -0.0109 |  0.0601 | -0.4520 |  0.1629
landing_rewards |    9.74 |    1.59 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -21.62 |    5.12 |  -47.47 |  -13.71
steps    |     378 |      21 |     335 |     420
***** Episode 78173, Mean R = -16.6  Std R = 5.3  Min R = -31.4
PolicyLoss: 1.98
Policy_Entropy: 0.221
Policy_KL: 0.0293
Policy_SD: 0.543
Steps: 1.18e+04
TotalSteps: 2.88e+07
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00221


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0010   0.0037   8.4490   2.3854   3.1146
ADVA:  (20981,) (35140,) 0.5970688673875925
ADV1:  0.0 -0.0008877604964680481 0.00729034088288919 0.04101775699814142 -0.07640039054028669
ADVB:  (15807,) (35140,) 0.4498292544109277
ADV2:  0.0 0.2507096506189322 0.443006339357656 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  12.0

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |    0.07   -0.19 |    0.61    1.85 |   -1.42   -3.12 |    1.45    3.12
w_f      |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.02    0.02    0.02
w_rewards |   -0.19 |    0.16 |   -1.03 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.29 |    0.00 |    2.71
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.88   -0.98 |    0.94    1.00
cs_angles |  0.0036  0.0008 |  0.0789  0.0819 | -0.8770 -0.9841 |  0.9361  0.9999
optical_flow | -0.0001  0.0002 |  0.0195  0.0204 | -1.1130 -1.0279 |  0.9467  1.0679
v_err    | -0.0110 |  0.0608 | -0.4524 |  0.1832
landing_rewards |    9.71 |    1.68 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -21.52 |    4.89 |  -40.50 |  -12.48
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.8578   0.6100   3.3388 149.7444 133.1056  53.3563
Update Cnt = 2540    ET =    788.6   Stats:  Mean, Std, Min, Max
r_f      |   11.78   -0.86    4.86 |  186.18  162.44  208.44 | -378.76 -386.08 -372.11 |  395.34  332.66  392.42
v_f      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.09 |    0.09    0.09    0.09
r_i      |   37.73  -16.32   29.40 |  694.72  629.13  788.15 |-1263.38-1324.19-1326.29 | 1313.65 1274.15 1363.67
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.09    0.10
norm_rf  |    0.15 |    0.06 |    0.02 |    0.41
norm_vf  |    0.08 |    0.01 |    0.04 |    0.13
gs_f     |    1.40 |    3.61 |    0.00 |   57.17
thrust   |    0.00    0.00    0.00 |    0.66    0.67    0.66 |   -3.44   -3.31   -3.44 |    3.46    3.34    3.44
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.52 |    0.17 |    1.14 |    2.16
rewards  |  -16.38 

ADVA:  (17571,) (34922,) 0.5031498768684497
ADV1:  0.0014209664174004138 0.0013836129573356712 0.00734564492056303 0.04640664771440761 -0.08958306238874736
ADVB:  (23835,) (34922,) 0.6825210469045301
ADV2:  0.35876296588470824 0.5450078574592013 0.6225183678070759 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.5006   1.7534   6.8405 149.7444 133.1056  53.3563
***** Episode 79041, Mean R = -17.4  Std R = 9.8  Min R = -64.2
PolicyLoss: 2.45
Policy_Entropy: 0.221
Policy_KL: 0.00611
Policy_SD: 0.546
Steps: 1.17e+04
TotalSteps: 2.92e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.00288


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0028   8.4490   2.3854   3.1146
ADVA:  (16987,) (35158,) 0.48316172706069743
ADV1:  1.4160371649227926e-06 -1.4704079324420359e-05 0.0070482611050971195 0.05804826755586184 -0.08958306238874736
ADVB:  (20456,) (35158,) 0.5818305933215768
ADV2:  0.15603050630972576 0.40135265752109206 0.550832661851

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0031   8.4490   2.3854   3.1146
ADVA:  (18892,) (35172,) 0.537131809393836
ADV1:  0.0004829758527179085 0.0003448141768212302 0.00822608415912018 0.08050631644353895 -0.128303162266187
ADVB:  (19906,) (35172,) 0.5659615603320823
ADV2:  0.17071197112860434 0.441356836913813 0.5680308294086035 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   7.0727   3.5105  11.5569 149.7444 133.1056  53.3563
***** Episode 79289, Mean R = -15.7  Std R = 6.3  Min R = -38.4
PolicyLoss: 2.38
Policy_Entropy: 0.222
Policy_KL: 0.0108
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 2.93e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.00263


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0016   0.0008   0.0032   8.4490   2.3854   3.1146
ADVA:  (18203,) (35223,) 0.516793004570877
ADV1:  0.001168165585350621 0.0005571909309876426 0.012609928599027747 0.08050631644353895 -0.3129349056973234
ADVB:  (22870,)

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0043   0.0026   0.0092   8.4490   2.3854   3.1146
ADVA:  (20163,) (35331,) 0.5706886303812516
ADV1:  0.0009855370521280465 0.00040492139636357096 0.009127596009577124 0.05630820010282428 -0.07655523987827784
ADVB:  (21179,) (35331,) 0.5994452463841952
ADV2:  0.2009844342728508 0.4123829880437953 0.5218605900075177 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.5783   1.1041   4.6200 149.7444 133.1056  53.3563
***** Episode 79537, Mean R = -15.7  Std R = 5.7  Min R = -30.6
PolicyLoss: 2.11
Policy_Entropy: 0.222
Policy_KL: 0.00877
Policy_SD: 0.537
Steps: 1.18e+04
TotalSteps: 2.93e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00259


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0013   0.0048   8.4490   2.3854   3.1146
ADVA:  (16902,) (35128,) 0.4811546344796174
ADV1:  0.002049539753929042 0.0012801746213902314 0.008427278400914987 0.053922205148200175 -0.07655523987827784
ADVB:  

***** Episode 79754, Mean R = -15.7  Std R = 4.3  Min R = -25.4
PolicyLoss: 2.18
Policy_Entropy: 0.223
Policy_KL: 0.00667
Policy_SD: 0.539
Steps: 1.16e+04
TotalSteps: 2.94e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.00261


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0011   0.0041   8.4490   2.3854   3.1146
ADVA:  (19772,) (34979,) 0.5652534377769519
ADV1:  0.0003053229661125253 -0.00024974139979648736 0.008079381861408554 0.04676184304089637 -0.06561939546248163
ADVB:  (18930,) (34979,) 0.5411818519683238
ADV2:  0.08972491349295768 0.35426508433142895 0.48137779422071353 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7459   0.2889   1.3544 149.7444 133.1056  53.3563
***** Episode 79785, Mean R = -16.9  Std R = 5.4  Min R = -33.3
PolicyLoss: 1.99
Policy_Entropy: 0.223
Policy_KL: 0.00808
Policy_SD: 0.535
Steps: 1.19e+04
TotalSteps: 2.94e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00237


ValFun  Gr

seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -0.99   -0.92 |    0.95    0.95
cs_angles |  0.0029 -0.0018 |  0.0796  0.0850 | -0.9948 -0.9169 |  0.9544  0.9509
optical_flow | -0.0000  0.0002 |  0.0210  0.0197 | -1.3268 -0.8711 |  0.9006  1.0565
v_err    | -0.0110 |  0.0604 | -0.4572 |  0.2160
landing_rewards |    9.58 |    2.00 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -21.70 |    4.92 |  -43.85 |  -13.34
steps    |     379 |      21 |     335 |     420
***** Episode 80033, Mean R = -16.4  Std R = 6.5  Min R = -29.3
PolicyLoss: 2.45
Policy_Entropy: 0.222
Policy_KL: 0.00634
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps: 2.95e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00258


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0012   8.4490   2.3854   3.1146
ADVA:  (17296,) (35298,) 0.4899994333956598
ADV1:  0.0011114706736266057 0.0009247619742144442 0.007489809533619469 0.1104210

theta_cv |    0.24 |    0.30 |    0.00 |    2.80
seeker_angles |   -0.00    0.00 |    0.08    0.08 |   -1.00   -0.99 |    0.99    0.98
cs_angles | -0.0003  0.0039 |  0.0835  0.0815 | -0.9951 -0.9921 |  0.9891  0.9794
optical_flow |  0.0001 -0.0000 |  0.0208  0.0202 | -1.1118 -1.0444 |  1.3538  1.0576
v_err    | -0.0109 |  0.0605 | -0.4569 |  0.1912
landing_rewards |    9.55 |    2.08 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -21.66 |    5.28 |  -41.70 |  -13.34
steps    |     377 |      20 |     334 |     418
***** Episode 80343, Mean R = -16.4  Std R = 5.5  Min R = -26.5
PolicyLoss: 1.9
Policy_Entropy: 0.223
Policy_KL: 0.0149
Policy_SD: 0.538
Steps: 1.16e+04
TotalSteps: 2.97e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00333


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0011   8.4490   2.3854   3.1146
ADVA:  (19317,) (35536,) 0.5435895992796038
ADV1:  0.0 -0.0002487522507467238 

theta_cv |    0.24 |    0.29 |    0.00 |    2.57
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -1.00   -0.99 |    0.99    1.00
cs_angles |  0.0002  0.0014 |  0.0843  0.0779 | -0.9951 -0.9907 |  0.9937  1.0000
optical_flow |  0.0001  0.0000 |  0.0211  0.0213 | -0.9343 -1.1738 |  1.1531  1.0952
v_err    | -0.0108 |  0.0603 | -0.4527 |  0.1931
landing_rewards |    9.52 |    2.15 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -21.54 |    4.46 |  -36.81 |  -12.86
steps    |     377 |      20 |     333 |     419
***** Episode 80653, Mean R = -15.6  Std R = 4.4  Min R = -28.6
PolicyLoss: 1.94
Policy_Entropy: 0.223
Policy_KL: 0.006
Policy_SD: 0.535
Steps: 1.16e+04
TotalSteps: 2.98e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00252


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0015   0.0057   8.4490   2.3854   3.1146
ADVA:  (19730,) (35174,) 0.5609256837436744
ADV1:  0.0 -0.0005844720625825147

a_f      |    0.01   -0.14 |    0.66    1.81 |   -1.51   -3.13 |    1.51    3.11
w_f      |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.03    0.02    0.02
w_rewards |   -0.23 |    0.17 |   -1.14 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.91
seeker_angles |    0.00    0.00 |    0.09    0.09 |   -0.96   -1.00 |    0.96    1.00
cs_angles |  0.0013  0.0034 |  0.0861  0.0851 | -0.9589 -0.9965 |  0.9616  0.9975
optical_flow | -0.0000  0.0001 |  0.0200  0.0185 | -1.0009 -0.8177 |  1.0666  1.0874
v_err    | -0.0109 |  0.0605 | -0.4567 |  0.1541
landing_rewards |    9.19 |    2.72 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -22.25 |    5.14 |  -42.05 |  -12.70
steps    |     376 |      20 |     336 |     413
***** Episode 80963, Mean R = -16.1  Std R = 6.9  Min R = -32.7
PolicyLoss: 1.52
Polic

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   7.9353   3.5124  15.1426 151.8980 133.1056  53.3563
Update Cnt = 2620    ET =    831.9   Stats:  Mean, Std, Min, Max
r_f      |  -13.75   -2.83    1.21 |  180.55  163.67  203.67 | -380.51 -380.65 -375.14 |  389.79  369.66  396.61
v_f      |    0.00    0.00   -0.00 |    0.05    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.12    0.09    0.11
r_i      |  -52.91  -34.95    8.68 |  671.73  619.94  799.55 |-1260.63-1270.20-1310.88 | 1254.67 1283.58 1360.17
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.15 |    0.07 |    0.02 |    0.51
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f     |    1.45 |    2.11 |    0.00 |   23.66
thrust   |   -0.00    0.00    0.00 |    0.65    0.65    0.66 |   -3.31   -3.30   -3.45 |    3.36    3.22    3.46
norm_thrust |    0.88 |    0.72 |    0.00 |    3.46
fuel     |    1.51 |    0.16 |    1.12 |    2.05
rewards  |  -17.14 

ADVA:  (19224,) (35085,) 0.5479264643009834
ADV1:  0.0008771712480748111 0.0001829498053010197 0.007935224412044129 0.08371915528305485 -0.06868626516402782
ADVB:  (21915,) (35085,) 0.6246259085079093
ADV2:  0.24897977626127712 0.41571903404819976 0.4997298243970903 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.3723   0.7563   3.8675 151.8980 133.1056  53.3563
***** Episode 81521, Mean R = -16.5  Std R = 5.3  Min R = -31.3
PolicyLoss: 2.03
Policy_Entropy: 0.223
Policy_KL: 0.0218
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 3.01e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00291


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0032   8.4490   2.3854   3.1146
ADVA:  (20321,) (34792,) 0.5840710508162796
ADV1:  0.001090901753114281 0.00028291767321507547 0.008421957085424469 0.07792717548712558 -0.0675406289845242
ADVB:  (21378,) (3

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0012   0.0049   8.4490   2.3854   3.1146
ADVA:  (19015,) (35273,) 0.5390808833952315
ADV1:  0.0005041640500729559 0.0001901753064182202 0.008744405432184322 0.061705667297695055 -0.12338452613552092
ADVB:  (20511,) (35273,) 0.581492926601083
ADV2:  0.153203926626459 0.41227827975290754 0.5607615276318666 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  12.7661   6.6830  29.5953 151.8980 133.1056  53.3563
***** Episode 81769, Mean R = -16.5  Std R = 6.6  Min R = -42.6
PolicyLoss: 2.16
Policy_Entropy: 0.223
Policy_KL: 0.0138
Policy_SD: 0.528
Steps: 1.17e+04
TotalSteps: 3.02e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00229


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0003   0.0014   8.4490   2.3854   3.1146
ADVA:  (17198,) (35203,) 0.488537908700963
ADV1:  0.0014513525724258623 0.0015838973623398054 0.00683197347892631 0.061705667297695055 -0.0857201790291805
ADVB:  (23

***** Episode 81986, Mean R = -17.1  Std R = 5.5  Min R = -30.7
PolicyLoss: 2.04
Policy_Entropy: 0.224
Policy_KL: 0.00624
Policy_SD: 0.536
Steps: 1.18e+04
TotalSteps: 3.03e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00243


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0015   8.4490   2.3854   3.1146
ADVA:  (19794,) (35275,) 0.5611339475549256
ADV1:  0.0008166492432664478 0.000265845370022337 0.007037675885118848 0.038433372537244825 -0.06382857155593391
ADVB:  (21461,) (35275,) 0.6083912119064493
ADV2:  0.1963898218231238 0.3797230933458691 0.48515689613504537 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6380   0.2068   1.1482 151.8980 133.1056  53.3563
***** Episode 82017, Mean R = -16.1  Std R = 6.2  Min R = -31.2
PolicyLoss: 1.89
Policy_Entropy: 0.224
Policy_KL: 0.00472
Policy_SD: 0.531
Steps: 1.16e+04
TotalSteps: 3.03e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00266


ValFun  Gradie

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.5365   1.2245   5.2630 151.8980 133.1056  53.3563
***** Episode 82234, Mean R = -17.3  Std R = 5.4  Min R = -32.6
PolicyLoss: 2.21
Policy_Entropy: 0.224
Policy_KL: 0.00796
Policy_SD: 0.531
Steps: 1.17e+04
TotalSteps: 3.04e+07
VF_0_ExplainedVarNew: 0.997
VF_0_ExplainedVarOld: 0.996
VF_0_Loss : 0.00267


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0020   8.4490   2.3854   3.1146
ADVA:  (19771,) (35449,) 0.5577308245648679
ADV1:  0.0 -0.0005198431179354656 0.007445788163387255 0.06995827330068238 -0.06158001560065146
ADVB:  (17737,) (35449,) 0.5003526192558323
ADV2:  0.0008037646626253378 0.3159823427119678 0.49966813008867267 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5745   0.5492   2.5521 151.8980 133.1056  53.3563
***** Episode 82265, Mean R = -15.1  Std R = 4.1  Min R = -26.9
PolicyLoss: 1.9
Policy_Entropy: 0.224
Policy_KL: 0.0097
Policy_SD: 0.527
Steps: 1.18e+04
TotalSteps: 3.04e+07
VF_

optical_flow |  0.0001  0.0000 |  0.0206  0.0181 | -1.0325 -0.8559 |  1.0868  1.0052
v_err    | -0.0107 |  0.0602 | -0.4540 |  0.1202
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.01
tracking_rewards |  -21.62 |    4.85 |  -40.53 |  -14.03
steps    |     378 |      21 |     337 |     421
***** Episode 82513, Mean R = -17.9  Std R = 6.7  Min R = -36.1
PolicyLoss: 1.55
Policy_Entropy: 0.223
Policy_KL: 0.00955
Policy_SD: 0.528
Steps: 1.17e+04
TotalSteps: 3.05e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00274


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0027   8.4490   2.3854   3.1146
ADVA:  (21364,) (35401,) 0.6034857772379312
ADV1:  0.0010205269179363857 -0.00013760190948767836 0.008707912670673931 0.04937931583234084 -0.0640945746297988
ADVB:  (21042,) (35401,) 0.594389988983362
ADV2:  0.1674405930098623 0.3463090383956732 0.4469234734967064 3.0 0.0
Policy  Gradients: 

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |   -0.02   -0.25 |    0.66    1.78 |   -1.51   -3.13 |    1.44    3.14
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.03    0.02    0.02
w_rewards |   -0.23 |    0.17 |   -1.01 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.86
seeker_angles |    0.00    0.01 |    0.09    0.08 |   -0.99   -0.98 |    0.94    0.93
cs_angles |  0.0044  0.0059 |  0.0853  0.0820 | -0.9902 -0.9752 |  0.9371  0.9276
optical_flow | -0.0000 -0.0000 |  0.0196  0.0185 | -0.9077 -0.9273 |  0.9831  0.9024
v_err    | -0.0109 |  0.0607 | -0.4556 |  0.1440
landing_rewards |    9.26 |    2.62 |    0.00 |   10.00
landing_margin |   -0.02 |    0.02 |   -0.06 |    0.03
tracking_rewards |  -21.98 |    5.04 |  -44.50 |  -13.38
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.9812   2.7095  12.5289 151.8980 133.1056  53.3563
Update Cnt = 2680    ET =    836.5   Stats:  Mean, Std, Min, Max
r_f      |    0.58   12.55   -7.03 |  199.31  166.84  196.06 | -395.11 -381.31 -394.53 |  398.12  390.17  383.24
v_f      |    0.00   -0.00    0.00 |    0.05    0.04    0.05 |   -0.09   -0.11   -0.09 |    0.11    0.09    0.09
r_i      |  -16.20    4.88  -45.22 |  713.37  652.26  750.57 |-1378.52-1261.19-1378.42 | 1310.37 1274.12 1256.70
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.03 |    0.40
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.22 |    1.91 |    0.00 |   18.39
thrust   |   -0.00    0.00    0.00 |    0.65    0.66    0.65 |   -3.27   -3.42   -3.19 |    3.45    3.15    3.38
norm_thrust |    0.88 |    0.71 |    0.00 |    3.46
fuel     |    1.50 |    0.16 |    1.15 |    2.61
rewards  |  -16.86 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6748   0.2255   1.3915 151.8980 133.1056  53.3563
***** Episode 83381, Mean R = -16.8  Std R = 3.8  Min R = -26.0
PolicyLoss: 1.76
Policy_Entropy: 0.224
Policy_KL: 0.00846
Policy_SD: 0.526
Steps: 1.19e+04
TotalSteps: 3.08e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00306


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0014   8.4490   2.3854   3.1146
ADVA:  (20059,) (35382,) 0.5669266858854785
ADV1:  0.0007895434277789852 0.00026522086101782023 0.007726236695595504 0.04226908454504058 -0.06015031351673362
ADVB:  (21533,) (35382,) 0.6085862868125035
ADV2:  0.195586208917235 0.38829759218613336 0.4950227186162659 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7558   0.2353   1.2696 151.8980 133.1056  53.3563
***** Episode 83412, Mean R = -16.6  Std R = 5.3  Min R = -30.8
Policy

ADVA:  (19874,) (35028,) 0.5673746716912185
ADV1:  0.0005182762808334136 3.434271802932267e-05 0.007667154512231153 0.05771617912993304 -0.05948516107080404
ADVB:  (20565,) (35028,) 0.5871017471736896
ADV2:  0.15260948447030334 0.3685315987724648 0.5004077270605772 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8997   0.2765   1.3454 151.8980 133.1056  53.3563
***** Episode 83629, Mean R = -17.3  Std R = 5.1  Min R = -32.1
PolicyLoss: 1.9
Policy_Entropy: 0.224
Policy_KL: 0.00543
Policy_SD: 0.527
Steps: 1.17e+04
TotalSteps: 3.09e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00281


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0054   8.4490   2.3854   3.1146
ADVA:  (19160,) (35375,) 0.541625441696113
ADV1:  0.0010390777053352514 0.0005347144226143591 0.007289585302068564 0.06897466518788176 -0.05948516107080404
ADVB:  (22636,) (35375,) 0.639886925795053
ADV2:  0.2738383471160727 0.4511735794182581 0.5364663888753801 3.0 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0031   8.4490   2.3854   3.1146
ADVA:  (21831,) (35058,) 0.62271093616293
ADV1:  0.0004662491918501384 -0.00032056717064794755 0.008950658641025703 0.08030177144646061 -0.08449479110435898
ADVB:  (19039,) (35058,) 0.5430714815448685
ADV2:  0.074711060966902 0.30566817984231687 0.4451863817887463 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6378   0.6520   2.8344 151.8980 133.1056  53.3563
***** Episode 83877, Mean R = -16.4  Std R = 5.8  Min R = -33.6
PolicyLoss: 1.71
Policy_Entropy: 0.224
Policy_KL: 0.0094
Policy_SD: 0.536
Steps: 1.18e+04
TotalSteps: 3.1e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00256


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0007   0.0029   8.4490   2.3854   3.1146
ADVA:  (22986,) (34890,) 0.6588134135855546
ADV1:  7.944438502370879e-05 -0.0008068210289651748 0.009400736437144449 0.04809750555054998 -0.10860898089033888
ADVB:  (

***** Episode 84094, Mean R = -16.8  Std R = 6.9  Min R = -41.7
PolicyLoss: 2.19
Policy_Entropy: 0.224
Policy_KL: 0.0136
Policy_SD: 0.532
Steps: 1.16e+04
TotalSteps: 3.11e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00308


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0017   8.4490   2.3854   3.1146
ADVA:  (19064,) (34913,) 0.5460430212241858
ADV1:  0.0015258052572314704 0.0005950115831856387 0.008424009810228483 0.04554164509149122 -0.09868445117458963
ADVB:  (23778,) (34913,) 0.6810643599805231
ADV2:  0.32250850576932294 0.4457831244715225 0.494872961304035 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.0990   1.5711   6.1112 151.8980 133.1056  53.3563
***** Episode 84125, Mean R = -18.4  Std R = 7.1  Min R = -35.1
PolicyLoss: 1.98
Policy_Entropy: 0.224
Policy_KL: 0.00984
Policy_SD: 0.534
Steps: 1.15e+04
TotalSteps: 3.11e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00261


ValFun  Gradient

seeker_angles |    0.00    0.01 |    0.09    0.09 |   -0.88   -0.93 |    1.00    0.98
cs_angles |  0.0045  0.0061 |  0.0856  0.0852 | -0.8797 -0.9296 |  0.9953  0.9812
optical_flow |  0.0000  0.0001 |  0.0188  0.0183 | -0.8553 -0.9912 |  1.1325  0.9208
v_err    | -0.0109 |  0.0607 | -0.4528 |  0.1466
landing_rewards |    9.58 |    2.00 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -22.10 |    5.41 |  -44.71 |  -13.48
steps    |     376 |      21 |     335 |     423
***** Episode 84373, Mean R = -17.8  Std R = 6.5  Min R = -38.6
PolicyLoss: 2.33
Policy_Entropy: 0.224
Policy_KL: 0.00938
Policy_SD: 0.533
Steps: 1.15e+04
TotalSteps: 3.12e+07
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.00192


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0066   0.0037   0.0133   8.4490   2.3854   3.1146
ADVA:  (22412,) (35037,) 0.6396666381254102
ADV1:  0.0 -0.0012234114074332859 0.008940573592708176 0.04471119542410973 -0.06

seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.95   -0.98 |    0.99    1.00
cs_angles |  0.0025  0.0024 |  0.0827  0.0839 | -0.9522 -0.9810 |  0.9930  0.9990
optical_flow |  0.0000  0.0001 |  0.0213  0.0197 | -1.0684 -1.0620 |  1.1535  0.9958
v_err    | -0.0108 |  0.0609 | -0.4517 |  0.1092
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.04
tracking_rewards |  -21.68 |    4.73 |  -38.93 |  -13.40
steps    |     377 |      20 |     334 |     418
***** Episode 84683, Mean R = -15.9  Std R = 5.2  Min R = -26.8
PolicyLoss: 1.76
Policy_Entropy: 0.223
Policy_KL: 0.0099
Policy_SD: 0.535
Steps: 1.16e+04
TotalSteps: 3.13e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00337


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0001   0.0006   8.4490   2.3854   3.1146
ADVA:  (17912,) (35039,) 0.5112018037044436
ADV1:  0.0 -0.0001867259439894646 0.005819742497211998 0.037028259913326145 -0.06

theta_cv |    0.24 |    0.29 |    0.00 |    2.54
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -0.93 |    0.96    0.89
cs_angles |  0.0021  0.0020 |  0.0821  0.0819 | -0.9867 -0.9339 |  0.9616  0.8859
optical_flow | -0.0000  0.0001 |  0.0186  0.0185 | -1.0966 -1.1223 |  0.9941  0.7780
v_err    | -0.0111 |  0.0610 | -0.4538 |  0.1149
landing_rewards |    9.71 |    1.68 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.07 |    0.02
tracking_rewards |  -21.77 |    5.24 |  -56.39 |  -13.97
steps    |     380 |      22 |     335 |     422
***** Episode 84993, Mean R = -19.3  Std R = 9.3  Min R = -55.5
PolicyLoss: 1.67
Policy_Entropy: 0.224
Policy_KL: 0.00714
Policy_SD: 0.54
Steps: 1.2e+04
TotalSteps: 3.14e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.00249


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0002   0.0007   8.4490   2.3854   3.1146
ADVA:  (20955,) (35380,) 0.5922837761447145
ADV1:  0.0 -0.0007117298176883975

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |    0.05   -0.05 |    0.64    1.82 |   -1.43   -3.13 |    1.49    3.14
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.03    0.02    0.02
w_rewards |   -0.20 |    0.15 |   -0.87 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.23 |    0.29 |    0.00 |    2.76
seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -1.00   -0.98 |    0.97    0.98
cs_angles |  0.0026 -0.0006 |  0.0842  0.0767 | -0.9972 -0.9773 |  0.9685  0.9774
optical_flow |  0.0000  0.0002 |  0.0203  0.0193 | -1.0743 -0.9834 |  0.9522  0.9686
v_err    | -0.0109 |  0.0610 | -0.4521 |  0.0976
landing_rewards |    9.68 |    1.77 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -21.32 |    4.71 |  -41.72 |  -13.37
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.3590   0.9528   5.1618 151.8980 133.1056  53.3563
Update Cnt = 2760    ET =   1085.2   Stats:  Mean, Std, Min, Max
r_f      |   -3.02  -10.81   -2.86 |  176.02  175.38  202.20 | -390.28 -389.97 -384.27 |  385.68  387.92  380.47
v_f      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.10    0.11    0.09
r_i      |  -12.74  -68.72  -32.85 |  651.81  659.95  784.57 |-1376.99-1339.33-1289.58 | 1327.56 1358.66 1356.80
v_i      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.09    0.10    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.39
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.55 |    2.78 |    0.01 |   33.66
thrust   |   -0.00   -0.00   -0.00 |    0.64    0.66    0.65 |   -3.20   -3.35   -3.46 |    3.25    3.44    3.41
norm_thrust |    0.87 |    0.72 |    0.00 |    3.46
fuel     |    1.51 |    0.16 |    1.13 |    2.27
rewards  |  -15.99 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1051   0.4031   2.1284 151.8980 133.1056  53.3563
***** Episode 85861, Mean R = -16.5  Std R = 6.0  Min R = -37.5
PolicyLoss: 1.72
Policy_Entropy: 0.224
Policy_KL: 0.00655
Policy_SD: 0.532
Steps: 1.19e+04
TotalSteps: 3.17e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00268


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0027   0.0015   0.0057   8.4490   2.3854   3.1146
ADVA:  (17920,) (35405,) 0.5061432001129784
ADV1:  0.000735340890058468 0.0006322612211552608 0.006657480751048189 0.03574484808833245 -0.07236911474212353
ADVB:  (22516,) (35405,) 0.6359553735348115
ADV2:  0.2428930665160743 0.46000406773197416 0.5704828791781585 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2247   0.4440   2.0857 151.8980 133.1056  53.3563
***** Episode 85892, Mean R = -16.0  Std R = 4.7  Min R = -24.8
PolicyLoss: 2.19
Policy_Entropy: 0.224
Policy_KL: 0.00662
Policy_SD: 0.531
Steps: 1.18e+04
TotalStep

ADVA:  (21733,) (35302,) 0.6156308424451873
ADV1:  0.0 -0.0008972571800004449 0.008557140341764283 0.06834013253290733 -0.15043183333857046
ADVB:  (16612,) (35302,) 0.47056823975978695
ADV2:  0.0 0.2437301575874203 0.4082225799228919 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8333   0.3205   1.4075 151.8980 133.1056  53.3563
***** Episode 86109, Mean R = -15.0  Std R = 3.2  Min R = -22.2
PolicyLoss: 1.57
Policy_Entropy: 0.223
Policy_KL: 0.00892
Policy_SD: 0.537
Steps: 1.18e+04
TotalSteps: 3.18e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.00329


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0023   8.4490   2.3854   3.1146
ADVA:  (21023,) (35274,) 0.5959913817542666
ADV1:  0.0002994334322488027 -3.358894983970942e-05 0.0072196339885295955 0.06834013253290733 -0.05051211432606828
ADVB:  (18276,) (35274,) 0.5181153257356693
ADV2:  0.03419151419851578 0.319765360244333 0.4882333830298555 3.0 0.0
Policy  Gradients: u/sd

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0056   8.4490   2.3854   3.1146
ADVA:  (20505,) (35204,) 0.5824622202022497
ADV1:  0.0 -0.0004905376702309305 0.007677266934281699 0.048327644264568315 -0.06356098722112348
ADVB:  (18193,) (35204,) 0.5167878650153391
ADV2:  0.02831593568814322 0.29027518826562443 0.4422584189951508 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2639   0.5041   2.0532 151.8980 133.1056  53.3563
***** Episode 86357, Mean R = -17.8  Std R = 6.3  Min R = -30.6
PolicyLoss: 1.7
Policy_Entropy: 0.223
Policy_KL: 0.00804
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps: 3.19e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00267


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0018   8.4490   2.3854   3.1146
ADVA:  (22498,) (35161,) 0.6398566593669122
ADV1:  0.00033408511994604574 -0.0005568525975015317 0.00836723251872732 0.048327644264568315 -0.06356098722112348
ADVB:  (18373,) (351

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0034   8.4490   2.3854   3.1146
ADVA:  (20448,) (34967,) 0.5847799353676323
ADV1:  0.0013631651279749808 0.00046185731493378677 0.008969405154549056 0.05694938575987579 -0.09621216703037572
ADVB:  (21158,) (34967,) 0.6050847942345641
ADV2:  0.21587200064664863 0.39795356610361826 0.48737776901494323 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.8801   1.1008   5.4390 151.8980 133.1056  53.3563
***** Episode 86605, Mean R = -17.7  Std R = 6.4  Min R = -38.3
PolicyLoss: 2
Policy_Entropy: 0.224
Policy_KL: 0.00875
Policy_SD: 0.532
Steps: 1.17e+04
TotalSteps: 3.2e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00287


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0014   0.0055   8.4490   2.3854   3.1146
ADVA:  (19782,) (34972,) 0.5656525220176141
ADV1:  0.001186276144549749

seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.97   -0.98 |    0.99    0.99
cs_angles |  0.0014  0.0042 |  0.0810  0.0817 | -0.9720 -0.9777 |  0.9942  0.9933
optical_flow |  0.0000 -0.0000 |  0.0205  0.0216 | -1.1295 -1.3692 |  1.0427  1.1131
v_err    | -0.0109 |  0.0615 | -0.4552 |  0.1589
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -21.61 |    5.02 |  -50.88 |  -14.09
steps    |     379 |      20 |     332 |     416
***** Episode 86853, Mean R = -16.5  Std R = 6.2  Min R = -30.0
PolicyLoss: 1.96
Policy_Entropy: 0.225
Policy_KL: 0.00774
Policy_SD: 0.528
Steps: 1.15e+04
TotalSteps: 3.21e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00306


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0010   0.0036   8.4490   2.3854   3.1146
ADVA:  (21009,) (35114,) 0.5983083670330922
ADV1:  0.0 -0.0015117333322226653 0.008797808060419516 0.06283618869825158 -0.06

thrust   |   -0.00    0.00    0.00 |    0.64    0.67    0.65 |   -3.39   -3.39   -3.24 |    3.17    3.34    3.17
norm_thrust |    0.88 |    0.72 |    0.00 |    3.46
fuel     |    1.51 |    0.16 |    1.11 |    2.12
rewards  |  -16.74 |    5.85 |  -35.06 |   -7.22
fuel_rewards |   -4.34 |    0.46 |   -6.10 |   -3.18
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    2.99 |   13.17 |    0.00 |  222.99
norm_af  |    1.69 |    0.88 |    0.12 |    3.32
norm_wf  |    0.01 |    0.01 |    0.00 |    0.04
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |    0.01    0.00   -0.03 |    1.15    0.65    1.80 |   -3.14   -1.55   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |    0.02   -0.03 |    0.65

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.0729   1.1342   4.6703 151.8980 133.1056  53.3563
***** Episode 87442, Mean R = -15.0  Std R = 5.4  Min R = -29.2
PolicyLoss: 1.72
Policy_Entropy: 0.226
Policy_KL: 0.00619
Policy_SD: 0.532
Steps: 1.16e+04
TotalSteps: 3.23e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00285


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0014   8.4490   2.3854   3.1146
ADVA:  (20318,) (34630,) 0.5867167196072769
ADV1:  0.0016098692315345085 0.0007406732775357458 0.00827672540966131 0.05077066740168934 -0.0546356362804375
ADVB:  (23048,) (34630,) 0.6655501010684378
ADV2:  0.29293504829514627 0.43379667818551665 0.5158434430973228 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7251   0.7695   3.8118 151.8980 133.1056  53.3563
Update Cnt = 2820    ET =   1824.2   Stats:  Mean, Std, Min, Max
r_f      |   12.91  -13.76    1.33 |  192.20  160.71  207.57 | -381.74 -372.62 -389.84 |  397.97  3

ADVA:  (21238,) (35213,) 0.6031295260273195
ADV1:  0.0011771141526423275 0.0003728971137464979 0.007555314945237845 0.04145902891730835 -0.05619076330422813
ADVB:  (21860,) (35213,) 0.6207934569619175
ADV2:  0.20118196141939512 0.38015682621638663 0.48776066322576594 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7035   0.6333   3.3725 151.8980 133.1056  53.3563
***** Episode 87690, Mean R = -17.5  Std R = 4.5  Min R = -29.2
PolicyLoss: 1.84
Policy_Entropy: 0.226
Policy_KL: 0.0106
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps: 3.24e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00258


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0002   0.0012   8.4490   2.3854   3.1146
ADVA:  (20783,) (35206,) 0.5903255126966994
ADV1:  0.0008856615342006761 -2.283210662241835e-05 0.007871594435116307 0.056862983722436033 -0.05619076330422813
ADVB:  (21528,) (35206,) 0.6114866784070897
ADV2:  0.19887602824820622 0.3706762201865267 0.47767195053510

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0018   8.4490   2.3854   3.1146
ADVA:  (20803,) (35477,) 0.5863799081094794
ADV1:  0.000483616385033225 -0.00022471926489783936 0.008236593564315242 0.06529165585395624 -0.05847838944043379
ADVB:  (19513,) (35477,) 0.5500183217295713
ADV2:  0.0961731398061565 0.35170244640932863 0.49002797270296056 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0241   0.3254   1.4818 151.8980 133.1056  53.3563
***** Episode 87938, Mean R = -17.5  Std R = 5.8  Min R = -29.5
PolicyLoss: 1.93
Policy_Entropy: 0.226
Policy_KL: 0.00874
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 3.25e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00251


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0016   8.4490   2.3854   3.1146
ADVA:  (18630,) (35356,) 0.5269261228645774
ADV1:  0.0016444597792589725 0.001128826091107907 0.006951945758507119 0.06529165585395624 -0.06348757025160817
ADVB:

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0012   8.4490   2.3854   3.1146
ADVA:  (21258,) (35191,) 0.6040749055156148
ADV1:  0.0003458051702338845 -0.0001559032946432635 0.00811543999123642 0.046675539644978226 -0.10769639012807636
ADVB:  (18861,) (35191,) 0.5359608990935182
ADV2:  0.05949155190924876 0.31432982891306366 0.4744535196850469 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2244   0.4258   2.0804 151.8980 133.1056  53.3563
***** Episode 88186, Mean R = -15.5  Std R = 4.9  Min R = -30.0
PolicyLoss: 1.77
Policy_Entropy: 0.226
Policy_KL: 0.0064
Policy_SD: 0.541
Steps: 1.18e+04
TotalSteps: 3.26e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00276


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0013   0.0047   8.4490   2.3854   3.1146
ADVA:  (17110,) (35179,) 0.4863697092015123
ADV1:  0.0009618687385670

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0032   0.0020   0.0072   8.4490   2.3854   3.1146
ADVA:  (18818,) (35186,) 0.5348149832319673
ADV1:  0.0 -0.0004588754572273149 0.006345662983428807 0.08704437944349813 -0.07218528197570076
ADVB:  (16680,) (35186,) 0.47405217984425624
ADV2:  0.0 0.32747855264261383 0.5295417423792399 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8499   0.3179   1.5258 151.8980 133.1056  53.3563
***** Episode 88434, Mean R = -15.3  Std R = 4.2  Min R = -22.8
PolicyLoss: 2.07
Policy_Entropy: 0.226
Policy_KL: 0.00911
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 3.27e+07
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.996
VF_0_Loss : 0.00287


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0027   8.4490   2.3854   3.1146
ADVA:  (19204,) (35020,) 0.5483723586521987
ADV1:  0.0 -0.00030233758996245645 0.005716768433578545 0

attitude |    0.01    0.02   -0.06 |    1.12    0.65    1.78 |   -3.14   -1.56   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |    0.01   -0.07 |    0.65    1.78 |   -1.37   -3.11 |    1.47    3.14
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.03    0.02    0.01
w_rewards |   -0.21 |    0.17 |   -0.89 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.80
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -0.95 |    0.99    0.97
cs_angles |  0.0004  0.0009 |  0.0839  0.0786 | -0.9879 -0.9542 |  0.9908  0.9655
optical_flow | -0.0001  0.0001 |  0.0207  0.0191 | -1.1389 -1.1176 |  0.9718  0.9441
v_err    | -0.0105 |  0.0612 | -0.4529 |  0.1065
landing_rewards |    9.65 |    1.85 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7329   0.7556   3.4773 151.8980 133.1056  53.3563
Update Cnt = 2870    ET =   1493.5   Stats:  Mean, Std, Min, Max
r_f      |   -3.06    7.76   -3.15 |  190.18  159.26  204.38 | -393.13 -365.51 -383.36 |  382.85  374.91  369.59
v_f      |    0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.10   -0.11   -0.10 |    0.10    0.10    0.11
r_i      |   -5.85   48.06  -10.06 |  713.19  610.23  767.25 |-1241.07-1299.61-1346.06 | 1319.88 1216.23 1319.02
v_i      |   -0.00   -0.00   -0.00 |    0.05    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.02 |    0.32
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.21 |    1.50 |    0.01 |   12.11
thrust   |    0.00   -0.00   -0.00 |    0.66    0.68    0.67 |   -3.23   -3.46   -3.39 |    3.38    3.39    3.45
norm_thrust |    0.91 |    0.72 |    0.00 |    3.46
fuel     |    1.53 |    0.16 |    1.13 |    2.05
rewards  |  -16.74 

ADVA:  (18373,) (34454,) 0.533261740291403
ADV1:  0.0019433748826074501 0.001207086695538573 0.008740467843484024 0.056627248641529015 -0.07215048674121627
ADVB:  (24101,) (34454,) 0.6995123933360422
ADV2:  0.3742448896799791 0.5048901205683453 0.549274104921069 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.4842   1.0162   5.1417 151.8980 133.1056  53.3563
***** Episode 89271, Mean R = -15.2  Std R = 4.4  Min R = -25.6
PolicyLoss: 2.17
Policy_Entropy: 0.227
Policy_KL: 0.00621
Policy_SD: 0.533
Steps: 1.14e+04
TotalSteps: 3.3e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.0034


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0004   0.0015   8.4490   2.3854   3.1146
ADVA:  (20047,) (34855,) 0.5751542102998135
ADV1:  0.0006348343957677345 -0.00010651914696305871 0.00860361389609284 0.056627248641529015 -0.05834811775243777
ADVB:  (20267,) (34855,) 0.581466073734041
ADV2:  0.140964764533184 0.33829584618072 0.46941986083520487 3.0 0.0


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0003   0.0016   8.4490   2.3854   3.1146
ADVA:  (21034,) (34814,) 0.6041822255414488
ADV1:  0.0004997208990523833 -0.00024986460017660295 0.00932722086606881 0.08072539805622186 -0.13258793660631174
ADVB:  (19063,) (34814,) 0.5475670707186764
ADV2:  0.08355146878617017 0.3229474680439121 0.4600043101502478 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  21.0105   7.3756  35.6685 151.8980 133.1056  53.3563
***** Episode 89519, Mean R = -17.0  Std R = 6.2  Min R = -29.6
PolicyLoss: 1.78
Policy_Entropy: 0.226
Policy_KL: 0.0265
Policy_SD: 0.538
Steps: 1.16e+04
TotalSteps: 3.31e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00312


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0018   0.0065   8.4490   2.3854   3.1146
ADVA:  (19727,) (35187,) 0.5606331883934408
ADV1:  0.0 -0.000523953059

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0021   0.0074   8.4490   2.3854   3.1146
ADVA:  (19141,) (35328,) 0.5418081974637681
ADV1:  0.0017853257186561068 0.0007371652040156361 0.008335525467568635 0.045267022334267826 -0.060901332906298566
ADVB:  (25009,) (35328,) 0.7079087409420289
ADV2:  0.37170714964610463 0.4701880400112174 0.49431502762659546 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0580   0.3352   1.7322 151.8980 133.1056  53.3563
***** Episode 89767, Mean R = -15.4  Std R = 4.0  Min R = -23.2
PolicyLoss: 2.01
Policy_Entropy: 0.226
Policy_KL: 0.00715
Policy_SD: 0.54
Steps: 1.18e+04
TotalSteps: 3.32e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00296


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0037   0.0023   0.0086   8.4490   2.3854   3.1146
ADVA:  (19822,) (35251,) 0.5623102890698136
ADV1:  0.001340648733209

***** Episode 89984, Mean R = -15.8  Std R = 5.3  Min R = -26.3
PolicyLoss: 2.13
Policy_Entropy: 0.227
Policy_KL: 0.0067
Policy_SD: 0.541
Steps: 1.19e+04
TotalSteps: 3.33e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00243


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0013   8.4490   2.3854   3.1146
ADVA:  (20672,) (35323,) 0.5852277552869235
ADV1:  0.00010762059250230341 -0.0003612926407558656 0.007581358652104316 0.032341028064902355 -0.05784708259814222
ADVB:  (19032,) (35323,) 0.5387990827506157
ADV2:  0.069584095836937 0.31811270247657797 0.4596241769928329 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9922   0.3526   1.7955 151.8980 133.1056  53.3563
***** Episode 90015, Mean R = -15.2  Std R = 5.1  Min R = -28.1
PolicyLoss: 1.78
Policy_Entropy: 0.227
Policy_KL: 0.00668
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 3.33e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00276


Dynamics: Ma

cs_angles |  0.0002  0.0000 |  0.0804  0.0806 | -0.9856 -0.9967 |  0.9869  0.9826
optical_flow |  0.0000  0.0000 |  0.0192  0.0195 | -1.0826 -1.0061 |  1.0212  0.9934
v_err    | -0.0103 |  0.0610 | -0.4549 |  0.1160
landing_rewards |    9.71 |    1.68 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -21.20 |    5.22 |  -43.71 |  -12.80
steps    |     379 |      21 |     338 |     419
***** Episode 90263, Mean R = -16.6  Std R = 5.4  Min R = -31.0
PolicyLoss: 1.96
Policy_Entropy: 0.226
Policy_KL: 0.0126
Policy_SD: 0.534
Steps: 1.19e+04
TotalSteps: 3.34e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00195


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0011   0.0040   8.4490   2.3854   3.1146
ADVA:  (20058,) (35773,) 0.5607022055740363
ADV1:  0.00041284728577250206 -0.0004612517337580914 0.008592794344782571 

attitude |    0.13   -0.01    0.17 |    1.17    0.63    1.86 |   -3.14   -1.55   -3.14 |    3.14    1.55    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |   -0.01    0.10 |    0.64    1.87 |   -1.38   -3.13 |    1.53    3.13
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.03    0.02    0.01
w_rewards |   -0.19 |    0.15 |   -0.90 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.31 |    0.00 |    2.80
seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -0.94   -0.99 |    0.99    0.99
cs_angles |  0.0021 -0.0007 |  0.0849  0.0795 | -0.9351 -0.9900 |  0.9929  0.9896
optical_flow | -0.0001  0.0000 |  0.0215  0.0199 | -0.9059 -1.0048 |  1.0050  1.1812
v_err    | -0.0102 |  0.0605 | -0.4526 |  0.1215
landing_rewards |    9.35 |    2.46 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  14.4015   9.0108  36.9896 151.8980 133.1056  53.3563
Update Cnt = 2930    ET =   1383.3   Stats:  Mean, Std, Min, Max
r_f      |    9.30  -15.37   -0.70 |  182.80  159.00  214.20 | -395.44 -385.93 -397.98 |  395.46  355.88  398.14
v_f      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.10    0.12    0.11
r_i      |   30.79  -52.47    5.40 |  659.88  652.81  790.71 |-1364.81-1334.36-1361.53 | 1293.42 1300.02 1361.47
v_i      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.10    0.10    0.10
norm_rf  |    0.15 |    0.06 |    0.03 |    0.36
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.68 |    4.79 |    0.01 |   72.53
thrust   |   -0.00   -0.01   -0.00 |    0.65    0.67    0.66 |   -3.45   -3.41   -3.44 |    3.45    3.25    3.44
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.51 |    0.16 |    1.13 |    2.03
rewards  |  -15.63 

ADVA:  (21235,) (35120,) 0.6046412300683371
ADV1:  0.0006975700173097003 -0.00019657633416744828 0.008239542010602804 0.04970714887358996 -0.05997138675179696
ADVB:  (20744,) (35120,) 0.5906605922551252
ADV2:  0.1562873452521235 0.33725133767845006 0.43568399762791754 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1232   0.4284   2.0646 151.8980 133.1056  53.3563
***** Episode 91131, Mean R = -15.6  Std R = 4.5  Min R = -25.6
PolicyLoss: 1.71
Policy_Entropy: 0.228
Policy_KL: 0.0103
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 3.37e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00314


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0006   0.0021   8.4490   2.3854   3.1146
ADVA:  (20483,) (35037,) 0.5846105545566116
ADV1:  0.0008575123920970193 9.093065697878841e-05 0.007945530699877764 0.04135692367679433 -0.06110171229588865
ADVB:  (21377,) (35037,) 0.6101264377657905
ADV2:  0.19383111530471278 0.3762379774130025 0.478864548223401

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0025   0.0015   0.0059   8.4490   2.3854   3.1146
ADVA:  (18059,) (34849,) 0.5182071221555855
ADV1:  0.0004239316350587753 0.0004286793785533481 0.00961879391574842 0.13185284116070678 -0.21250103947651378
ADVB:  (21193,) (34849,) 0.6081379666561451
ADV2:  0.1729216967302517 0.3883083232080366 0.5285903784861565 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.8939   0.9902   4.2607 151.8980 133.1056  53.3563
***** Episode 91379, Mean R = -15.7  Std R = 4.8  Min R = -32.9
PolicyLoss: 1.92
Policy_Entropy: 0.227
Policy_KL: 0.00846
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 3.38e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00272


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0032   8.4490   2.3854   3.1146
ADVA:  (20080,) (35270,) 0.5693223702863623
ADV1:  0.0001959716513284266 0.00017759221158597638 0.008103397278689207 0.09843486244058922 -0.05776035285020824
ADVB:  

***** Episode 91596, Mean R = -16.7  Std R = 6.8  Min R = -34.2
PolicyLoss: 1.6
Policy_Entropy: 0.227
Policy_KL: 0.00796
Policy_SD: 0.532
Steps: 1.18e+04
TotalSteps: 3.39e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00355


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0016   8.4490   2.3854   3.1146
ADVA:  (21991,) (35287,) 0.6232040128092499
ADV1:  0.0 -0.0009202776139093859 0.009067941479118649 0.07039986204864296 -0.1693733794474403
ADVB:  (17677,) (35287,) 0.5009493581205543
ADV2:  0.0014359307845558144 0.24953339124741028 0.4065678679235265 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1856   0.3737   1.7812 151.8980 133.1056  53.3563
***** Episode 91627, Mean R = -16.1  Std R = 6.5  Min R = -32.8
PolicyLoss: 1.5
Policy_Entropy: 0.227
Policy_KL: 0.008
Policy_SD: 0.534
Steps: 1.16e+04
TotalSteps: 3.39e+07
VF_0_ExplainedVa

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.5012   2.0937   8.0134 151.8980 133.1056  53.3563
***** Episode 91844, Mean R = -17.4  Std R = 5.5  Min R = -31.4
PolicyLoss: 1.81
Policy_Entropy: 0.227
Policy_KL: 0.0142
Policy_SD: 0.541
Steps: 1.18e+04
TotalSteps: 3.4e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.00307


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0046   0.0027   0.0101   8.4490   2.3854   3.1146
ADVA:  (19774,) (35571,) 0.5559022799471479
ADV1:  0.0 -0.00097763550663576 0.007646711121241999 0.04470497859093475 -0.06251351233531544
ADVB:  (17424,) (35571,) 0.4898372269545416
ADV2:  0.0 0.2774407822646134 0.45201708593053586 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1409   0.3687   1.8266 151.8980 133.1056  53.3563
***** Episode 91875, Mean R = -17.9  Std R = 6.4  Min R = -33.5
PolicyLoss: 1.7
Policy_Entropy: 0.226
Policy_KL: 0.0115
Policy_SD: 0.542
Steps: 1.19e+04
TotalSteps: 3.4e+07
VF_0_ExplainedVarNew: 0.98

seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -0.97 |    0.98    0.99
cs_angles |  0.0038  0.0015 |  0.0818  0.0836 | -0.9877 -0.9700 |  0.9832  0.9851
optical_flow | -0.0000  0.0001 |  0.0198  0.0196 | -1.0601 -1.0888 |  1.1351  1.1291
v_err    | -0.0104 |  0.0610 | -0.4538 |  0.2850
landing_rewards |    9.74 |    1.59 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.01
tracking_rewards |  -21.89 |    5.55 |  -47.37 |  -13.06
steps    |     378 |      20 |     334 |     418
***** Episode 92123, Mean R = -16.0  Std R = 5.9  Min R = -35.9
PolicyLoss: 2.24
Policy_Entropy: 0.226
Policy_KL: 0.0147
Policy_SD: 0.543
Steps: 1.18e+04
TotalSteps: 3.41e+07
VF_0_ExplainedVarNew: 0.997
VF_0_ExplainedVarOld: 0.996
VF_0_Loss : 0.00279


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0026   0.0015   0.0055   8.4490   2.3854   3.1146
ADVA:  (20758,) (35221,) 0.58936

attitude |   -0.00    0.07    0.10 |    1.22    0.63    1.88 |   -3.14   -1.54   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.08   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |    0.07    0.06 |    0.63    1.88 |   -1.35   -3.14 |    1.50    3.13
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.04    0.02    0.02
w_rewards |   -0.22 |    0.19 |   -1.53 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.74
seeker_angles |    0.00   -0.00 |    0.08    0.08 |   -0.98   -1.00 |    0.99    1.00
cs_angles |  0.0025 -0.0007 |  0.0809  0.0782 | -0.9828 -0.9994 |  0.9924  0.9979
optical_flow |  0.0001  0.0001 |  0.0187  0.0189 | -1.1517 -0.9524 |  1.0441  1.0479
v_err    | -0.0104 |  0.0606 | -0.4525 |  0.1607
landing_rewards |    9.26 |    2.62 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  51.8802  18.0869  90.3029 151.8980 133.1056  53.3563
Update Cnt = 2990    ET =   1580.5   Stats:  Mean, Std, Min, Max
r_f      |  -16.52    3.72  -18.77 |  178.91  163.33  207.00 | -381.10 -377.00 -398.95 |  395.19  393.39  391.41
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.08   -0.11 |    0.10    0.10    0.09
r_i      |  -10.47   16.45  -86.50 |  669.83  624.52  795.86 |-1324.51-1327.67-1322.01 | 1392.27 1289.20 1286.32
v_i      |    0.00   -0.00    0.01 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.08    0.09
norm_rf  |    0.14 |    0.06 |    0.01 |    0.45
norm_vf  |    0.08 |    0.01 |    0.05 |    0.13
gs_f     |    1.39 |    1.79 |    0.01 |   16.23
thrust   |    0.00    0.01   -0.00 |    0.66    0.67    0.68 |   -3.39   -3.45   -3.46 |    3.37    3.46    3.46
norm_thrust |    0.91 |    0.72 |    0.00 |    3.46
fuel     |    1.54 |    0.19 |    1.10 |    2.28
rewards  |  -16.54 

ADVA:  (21430,) (35117,) 0.6102457499216903
ADV1:  2.1611412759641943e-05 -0.0010976187383276742 0.009370850977222216 0.07767521038647957 -0.09768189990502812
ADVB:  (19020,) (35117,) 0.5416180197625082
ADV2:  0.06950847854213295 0.27990182639856603 0.4031989587269379 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7685   0.2463   1.1602 151.8980 133.1056  53.3563
***** Episode 92991, Mean R = -17.3  Std R = 7.0  Min R = -36.6
PolicyLoss: 1.55
Policy_Entropy: 0.226
Policy_KL: 0.00596
Policy_SD: 0.545
Steps: 1.18e+04
TotalSteps: 3.44e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.977
VF_0_Loss : 0.00306


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0030   0.0017   0.0066   8.4490   2.3854   3.1146
ADVA:  (20501,) (35257,) 0.5814731826303996
ADV1:  5.574961989542222e-06 -0.0008357130099962985 0.009433267123738815 0.06307024269165773 -0.09768189990502812
ADVB:  (19426,) (35257,) 0.5509827835607114
ADV2:  0.0828077708566452 0.30738068702831295 0.4459925129148

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0018   8.4490   2.3854   3.1146
ADVA:  (19340,) (34810,) 0.555587474863545
ADV1:  0.0008913256807056239 8.608408957031433e-05 0.009116971155548569 0.05477570853271674 -0.0984645444455804
ADVB:  (21512,) (34810,) 0.6179833381212295
ADV2:  0.20328974976847541 0.39375484156008467 0.508450698518743 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9008   0.2812   1.4487 151.8980 133.1056  53.3563
***** Episode 93239, Mean R = -16.0  Std R = 5.6  Min R = -35.3
PolicyLoss: 1.91
Policy_Entropy: 0.227
Policy_KL: 0.00669
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 3.45e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00273


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0033   0.0019   0.0069   8.4490   2.3854   3.1146
ADVA:  (18725,) (34802,) 0.5380437905867479
ADV1:  0.002039793579555451 0.0018345851944991964 0.008070092665183243 0.05477570853271674 -0.0984645444455804
ADVB:  (23

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0030   8.4490   2.3854   3.1146
ADVA:  (21778,) (35219,) 0.6183594082739431
ADV1:  0.001307523180457383 0.0007416830563166679 0.007872657192186926 0.06517727056401323 -0.11852291593407249
ADVB:  (21111,) (35219,) 0.599420767199523
ADV2:  0.173681964255765 0.3703452766267122 0.4856121174724051 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.1673   3.0704  11.2134 151.8980 133.1056  53.3563
***** Episode 93487, Mean R = -15.2  Std R = 3.8  Min R = -23.3
PolicyLoss: 1.85
Policy_Entropy: 0.228
Policy_KL: 0.00845
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps: 3.46e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.00261


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0036   8.4490   2.3854   3.1146
ADVA:  (21486,) (35260,) 0.6093590470788429
ADV1:  0.0008559435680984065 0

***** Episode 93704, Mean R = -14.9  Std R = 5.1  Min R = -28.3
PolicyLoss: 1.87
Policy_Entropy: 0.227
Policy_KL: 0.00978
Policy_SD: 0.539
Steps: 1.16e+04
TotalSteps: 3.47e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00261


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0018   8.4490   2.3854   3.1146
ADVA:  (19408,) (34732,) 0.5587930438788438
ADV1:  0.0005363631170335849 0.000160421812577014 0.008360608018912634 0.0930187861217886 -0.10986587697458272
ADVB:  (20423,) (34732,) 0.5880168144650466
ADV2:  0.14175996068168312 0.35564928601183543 0.49638011542294475 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8754   0.3511   1.6681 151.8980 133.1056  53.3563
***** Episode 93735, Mean R = -17.2  Std R = 7.3  Min R = -36.2
PolicyLoss: 1.81
Policy_Entropy: 0.228
Policy_KL: 0.00795
Policy_SD: 0.537
Steps: 1.15e+04
TotalSteps: 3.47e+

seeker_angles |   -0.00    0.00 |    0.08    0.08 |   -0.97   -1.00 |    0.99    0.94
cs_angles | -0.0011  0.0032 |  0.0809  0.0811 | -0.9666 -0.9993 |  0.9920  0.9385
optical_flow |  0.0000  0.0000 |  0.0192  0.0198 | -1.0340 -1.1011 |  0.9829  1.0984
v_err    | -0.0108 |  0.0608 | -0.4562 |  0.1150
landing_rewards |    9.26 |    2.62 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.03
tracking_rewards |  -21.13 |    5.02 |  -40.41 |  -13.07
steps    |     376 |      21 |     327 |     417
***** Episode 93983, Mean R = -17.5  Std R = 7.1  Min R = -33.6
PolicyLoss: 2.86
Policy_Entropy: 0.226
Policy_KL: 0.0122
Policy_SD: 0.55
Steps: 1.17e+04
TotalSteps: 3.48e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00275


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0019   8.4490   2.3854   3.1146
ADVA:  (16893,) (35079,) 0.481570

attitude |   -0.06    0.02   -0.01 |    1.22    0.69    1.83 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |    0.03   -0.03 |    0.69    1.83 |   -1.48   -3.12 |    1.52    3.14
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.03    0.02    0.02
w_rewards |   -0.22 |    0.16 |   -0.87 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.31 |    0.00 |    2.91
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -0.97 |    0.99    0.91
cs_angles |  0.0015  0.0002 |  0.0799  0.0828 | -0.9896 -0.9665 |  0.9900  0.9063
optical_flow | -0.0001  0.0002 |  0.0193  0.0184 | -0.8656 -0.9541 |  1.0490  0.9207
v_err    | -0.0107 |  0.0608 | -0.4587 |  0.1328
landing_rewards |    9.29 |    2.57 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6281   0.2462   1.1537 151.8980 133.1056  53.3563
Update Cnt = 3050    ET =   1556.6   Stats:  Mean, Std, Min, Max
r_f      |   -9.06   -1.71   -2.28 |  182.26  164.61  210.73 | -387.67 -346.35 -383.60 |  389.76  380.68  394.70
v_f      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.10    0.10    0.10
r_i      |   -9.35    2.98  -14.73 |  681.21  644.60  784.21 |-1319.17-1247.90-1357.88 | 1366.87 1257.48 1266.40
v_i      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.15 |    0.06 |    0.03 |    0.46
norm_vf  |    0.08 |    0.01 |    0.04 |    0.12
gs_f     |    1.38 |    2.12 |    0.01 |   20.54
thrust   |    0.00    0.00    0.00 |    0.67    0.68    0.68 |   -3.33   -3.36   -3.41 |    3.39    3.05    3.32
norm_thrust |    0.92 |    0.72 |    0.00 |    3.46
fuel     |    1.57 |    0.18 |    1.15 |    2.27
rewards  |  -16.33 

ADVA:  (18960,) (34951,) 0.5424737489628337
ADV1:  0.0013119796207186965 0.000467304020596933 0.00848023501905589 0.06362516425280465 -0.09618114307747477
ADVB:  (23853,) (34951,) 0.6824697433549827
ADV2:  0.3073025597664229 0.44455431481847196 0.5047506283399542 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8189   0.2578   1.3781 151.8980 133.1056  53.3563
***** Episode 94851, Mean R = -16.3  Std R = 7.5  Min R = -43.6
PolicyLoss: 1.96
Policy_Entropy: 0.228
Policy_KL: 0.00635
Policy_SD: 0.537
Steps: 1.16e+04
TotalSteps: 3.51e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.0026


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0016   8.4490   2.3854   3.1146
ADVA:  (18136,) (35171,) 0.5156520997412641
ADV1:  0.0006550442736984827 0.00024087627736503977 0.007316724118648408 0.06362516425280465 -0.09618114307747477
ADVB:  (22390,) (35171,) 0.636604020357681
ADV2:  0.2456411886814836 0.43503809237483065 0.5315504807156086 3.0

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0006   8.4490   2.3854   3.1146
ADVA:  (23953,) (35144,) 0.6815672661051673
ADV1:  0.0005387243458314346 -0.0007490570125254025 0.01129298021726186 0.13451927971490263 -0.23465670155075435
ADVB:  (18109,) (35144,) 0.5152799908946051
ADV2:  0.019462040327252135 0.2386302154316703 0.3849246999549369 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0058   0.4240   2.1301 151.8980 133.1056  53.3563
***** Episode 95099, Mean R = -16.0  Std R = 5.3  Min R = -27.8
PolicyLoss: 1.39
Policy_Entropy: 0.227
Policy_KL: 0.00964
Policy_SD: 0.542
Steps: 1.17e+04
TotalSteps: 3.52e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.983
VF_0_Loss : 0.00259


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0011   0.0037   8.4490   2.3854   3.1146
ADVA:  (21481,) (35046,) 0.6129372824288079
ADV1:  0.0018953856151702874 0.0007661304351719956 0.009659103470081904 0.13451927971490263 -0.07701152424201038
ADVB:

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0002   0.0010   8.4490   2.3854   3.1146
ADVA:  (20026,) (35031,) 0.5716650966287002
ADV1:  0.0009557672429781078 0.00023371064753301298 0.008510433962574839 0.07616919962557142 -0.08651351784053962
ADVB:  (22524,) (35031,) 0.6429733664468613
ADV2:  0.22426222917148547 0.384869735838348 0.47637019641776596 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2160   0.4600   2.1705 151.8980 133.1056  53.3563
***** Episode 95347, Mean R = -16.8  Std R = 6.6  Min R = -38.3
PolicyLoss: 1.79
Policy_Entropy: 0.228
Policy_KL: 0.0113
Policy_SD: 0.55
Steps: 1.16e+04
TotalSteps: 3.53e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00343


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0038   0.0022   0.0082   8.4490   2.3854   3.1146
ADVA:  (20409,) (34918,) 0.5844836474024858
ADV1:  0.0 -0.0008298737478

***** Episode 95564, Mean R = -15.4  Std R = 4.7  Min R = -29.7
PolicyLoss: 1.87
Policy_Entropy: 0.229
Policy_KL: 0.0151
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 3.54e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.987
VF_0_Loss : 0.00315


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0004   0.0018   8.4490   2.3854   3.1146
ADVA:  (20587,) (35061,) 0.5871766350075582
ADV1:  0.0013717818350002728 0.0008567154053251718 0.009246233006621992 0.1224679248460572 -0.06793821540231007
ADVB:  (22778,) (35061,) 0.6496677219702803
ADV2:  0.24371857197779487 0.4210163908931537 0.5224684741357402 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7041   0.6736   3.2780 151.8980 133.1056  53.3563
***** Episode 95595, Mean R = -15.5  Std R = 6.1  Min R = -38.1
PolicyLoss: 1.95
Policy_Entropy: 0.228
Policy_KL: 0.00764
Policy_SD: 0.541
Steps: 1.17e+04
TotalSteps: 3.54e+07

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |   -0.01   -0.12 |    0.66    1.84 |   -1.38   -3.13 |    1.45    3.13
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.03   -0.03 |    0.03    0.02    0.02
w_rewards |   -0.22 |    0.14 |   -0.75 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.31 |    0.00 |    2.92
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -1.00   -0.94 |    0.99    0.97
cs_angles |  0.0030  0.0030 |  0.0784  0.0800 | -0.9996 -0.9359 |  0.9917  0.9724
optical_flow |  0.0001  0.0001 |  0.0186  0.0173 | -0.9994 -0.8824 |  1.0465  1.0661
v_err    | -0.0110 |  0.0604 | -0.4622 |  0.1256
landing_rewards |    9.35 |    2.46 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -20.90 |    4.86 |  -48.58 |  -13.90
steps    |     376 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6815   0.2246   1.2079 151.8980 133.1056  53.3563
Update Cnt = 3100    ET =   1580.8   Stats:  Mean, Std, Min, Max
r_f      |  -12.69  -11.84    3.17 |  198.89  168.79  192.32 | -393.49 -387.85 -389.58 |  395.44  369.19  381.61
v_f      |    0.00    0.00   -0.00 |    0.05    0.04    0.05 |   -0.09   -0.12   -0.11 |    0.11    0.10    0.11
r_i      |  -65.32  -17.31    0.95 |  734.85  634.79  738.02 |-1295.97-1264.32-1345.93 | 1325.14 1261.94 1349.76
v_i      |    0.00    0.00    0.00 |    0.05    0.04    0.05 |   -0.10   -0.10   -0.09 |    0.09    0.09    0.10
norm_rf  |    0.14 |    0.06 |    0.01 |    0.34
norm_vf  |    0.08 |    0.01 |    0.05 |    0.13
gs_f     |    1.38 |    3.09 |    0.01 |   44.83
thrust   |    0.01    0.00    0.00 |    0.67    0.67    0.66 |   -3.44   -3.36   -3.44 |    3.44    3.44    3.36
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.53 |    0.17 |    1.10 |    2.29
rewards  |  -16.14 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7311   0.2762   1.4569 151.8980 133.1056  53.3563
***** Episode 96401, Mean R = -17.2  Std R = 4.3  Min R = -30.1
PolicyLoss: 1.68
Policy_Entropy: 0.228
Policy_KL: 0.00928
Policy_SD: 0.544
Steps: 1.19e+04
TotalSteps: 3.57e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.00241


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0014   0.0058   8.4490   2.3854   3.1146
ADVA:  (20672,) (35230,) 0.5867726369571388
ADV1:  0.0009564367233199575 0.0002420555111587915 0.008569413440348584 0.08282519693040069 -0.0782445721984425
ADVB:  (21465,) (35230,) 0.6092818620493897
ADV2:  0.19651776087967404 0.3927751585460155 0.5027501558570305 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2136   0.4177   2.1462 151.8980 133.1056  53.3563
***** Episode 96432, Mean R = -16.1  Std R = 5.0  Min R = -28.9
PolicyLoss: 1.93
Policy_Entropy: 0.228
Policy_KL: 0.00864
Policy_SD: 0.54
Steps: 1.18e+04
TotalSteps

ADVA:  (17816,) (34901,) 0.5104724792985874
ADV1:  0.00110686009028221 0.000779249932844536 0.007998677561698414 0.10920921506296255 -0.07564101686639402
ADVB:  (23602,) (34901,) 0.6762556946792355
ADV2:  0.33718907876669774 0.49464303300089557 0.5605621035689572 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.9107   1.2703   5.1592 151.8980 133.1056  53.3563
***** Episode 96649, Mean R = -16.7  Std R = 5.6  Min R = -31.7
PolicyLoss: 2.18
Policy_Entropy: 0.23
Policy_KL: 0.00608
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 3.58e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00312


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0001   0.0007   8.4490   2.3854   3.1146
ADVA:  (20248,) (34946,) 0.579408229840325
ADV1:  0.0004219474004676302 0.00010702007438458152 0.007825889888952286 0.0634038261000674 -0.07939743582422226
ADVB:  (19977,) (34946,) 0.571653408115378
ADV2:  0.12264639013363232 0.3554389057832779 0.48981734726259896 3.0 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0007   8.4490   2.3854   3.1146
ADVA:  (23106,) (34941,) 0.6612861681119602
ADV1:  0.00035681664127972836 -0.0010477039446496832 0.00986122727591207 0.07033787796684904 -0.07553414028670595
ADVB:  (17453,) (34941,) 0.4994991557196417
ADV2:  0.0 0.22670470177015534 0.38256440592669055 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4941   0.6706   3.0194 151.8980 133.1056  53.3563
***** Episode 96897, Mean R = -15.8  Std R = 5.9  Min R = -29.7
PolicyLoss: 1.35
Policy_Entropy: 0.229
Policy_KL: 0.00785
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 3.59e+07
VF_0_ExplainedVarNew: 0.983
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00315


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0002   0.0011   8.4490   2.3854   3.1146
ADVA:  (21190,) (35219,) 0.601663874613135
ADV1:  0.0010942853695134046 0.000255354

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  31.3219  16.2476  66.8531 151.8980 133.1056  53.3563
***** Episode 97114, Mean R = -15.7  Std R = 4.5  Min R = -30.5
PolicyLoss: 2.38
Policy_Entropy: 0.223
Policy_KL: 0.101
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 3.6e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00277


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0108   0.0096   0.0488   8.4490   2.3854   3.1146
ADVA:  (21643,) (35001,) 0.6183537613211051
ADV1:  0.0002467515132064859 -0.0035088561031952166 0.07288961316960055 0.07918090132864902 -2.3150598723916396
ADVB:  (20617,) (35001,) 0.5890403131339105
ADV2:  0.05170614734910843 0.12051603419040198 0.16154726023514496 2.6081471968659318 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.1892   1.6010   7.5743 151.8980 133.1056  53.3563
***** Episode 97145, Mean R = -21.9  Std R = 36.1  Min R = -217.2
PolicyLoss: 0.62
Policy_Entropy: 0.226
Policy_KL: 0.0462
Policy_SD: 0.54
Steps: 1.16e

seeker_angles |    0.00    0.01 |    0.08    0.08 |   -0.96   -0.92 |    0.98    0.97
cs_angles |  0.0029  0.0051 |  0.0815  0.0779 | -0.9599 -0.9170 |  0.9838  0.9659
optical_flow | -0.0000  0.0000 |  0.0200  0.0193 | -1.0731 -1.0592 |  0.9117  1.1765
v_err    | -0.0119 |  0.0644 | -1.1570 |  0.1031
landing_rewards |    9.42 |    2.34 |    0.00 |   10.00
landing_margin |    2.99 |   52.93 |   -0.07 |  933.41
tracking_rewards |  -21.39 |    8.80 | -148.61 |  -12.42
steps    |     378 |      24 |     141 |     420
***** Episode 97393, Mean R = -16.1  Std R = 5.5  Min R = -27.3
PolicyLoss: 1.85
Policy_Entropy: 0.229
Policy_KL: 0.00634
Policy_SD: 0.541
Steps: 1.18e+04
TotalSteps: 3.61e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00362


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0007   0.0026   8.4490   2.3854   3.1146
ADVA:  (20136,) (35383,) 0.5690868496170477
ADV1:  0.0011258045705856458 0.0005295433174860484 0.011211584211425901 0.3950752

attitude |   -0.04   -0.00    0.20 |    1.24    0.66    1.83 |   -3.14   -1.57   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |   -0.00    0.16 |    0.66    1.85 |   -1.48   -3.12 |    1.34    3.14
w_f      |    0.01   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.03    0.02    0.01
w_rewards |   -0.24 |    0.17 |   -0.92 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.86
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -0.98 |    0.96    0.89
cs_angles |  0.0025  0.0042 |  0.0803  0.0807 | -0.9921 -0.9849 |  0.9597  0.8864
optical_flow |  0.0001  0.0001 |  0.0189  0.0191 | -0.9168 -1.0694 |  0.9143  0.9684
v_err    | -0.0111 |  0.0599 | -0.4527 |  0.1162
landing_rewards |    9.68 |    1.77 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (18063,) (35286,) 0.5119027376296548
ADV1:  0.000712711062305171 0.0005862227283253711 0.007582737618366087 0.0636969630143277 -0.13729539725805895
ADVB:  (21788,) (35286,) 0.6174686844640934
ADV2:  0.20309775378916814 0.4231103360579075 0.5597007673121123 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.7162   1.8755   7.2696 151.8980 133.1056  53.3563
Update Cnt = 3160    ET =   1379.5   Stats:  Mean, Std, Min, Max
r_f      |    8.77   -1.40    7.99 |  177.73  168.13  199.74 | -367.78 -352.91 -380.31 |  375.19  377.08  387.37
v_f      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.10    0.09    0.10
r_i      |   56.32   12.12   14.72 |  671.88  651.36  766.40 |-1316.33-1254.07-1263.69 | 1254.10 1232.02 1316.77
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.10    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.36
norm_vf  |    0.08 |    0.01 |    0.05 |    0.13
gs_f     |

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0009   0.0032   8.4490   2.3854   3.1146
ADVA:  (20170,) (34858,) 0.5786333122955993
ADV1:  0.0002779678179667851 -0.00022485061088549067 0.013737563223316488 0.8526460177589629 -0.205834659962154
ADVB:  (19713,) (34858,) 0.5655229789431407
ADV2:  0.08752936013134496 0.2713748735023295 0.39786747355507485 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2373   0.5260   2.2226 151.8980 133.1056  53.3563
***** Episode 98261, Mean R = -16.1  Std R = 5.3  Min R = -30.2
PolicyLoss: 1.43
Policy_Entropy: 0.23
Policy_KL: 0.008
Policy_SD: 0.535
Steps: 1.15e+04
TotalSteps: 3.64e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00428


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0021   8.4490   2.3854   3.1146
ADVA:  (21545,) (34725,) 0.6204463642908568
ADV1:  0.00025762738083432747

***** Episode 98478, Mean R = -17.2  Std R = 6.4  Min R = -36.1
PolicyLoss: 1.67
Policy_Entropy: 0.23
Policy_KL: 0.011
Policy_SD: 0.534
Steps: 1.14e+04
TotalSteps: 3.65e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.98
VF_0_Loss : 0.00365


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0010   0.0045   8.4490   2.3854   3.1146
ADVA:  (19986,) (34923,) 0.5722876041577184
ADV1:  0.00046485153513264703 -0.00036550987752096195 0.009312129974266073 0.08299757893527476 -0.08682130283838885
ADVB:  (19415,) (34923,) 0.5559373478796209
ADV2:  0.10742135962489285 0.33047195190752654 0.45289252275146547 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8969   0.2795   1.5599 151.8980 133.1056  53.3563
***** Episode 98509, Mean R = -15.4  Std R = 5.6  Min R = -31.4
PolicyLoss: 1.77
Policy_Entropy: 0.23
Policy_KL: 0.00916
Policy_SD: 0.534
Steps: 1.17e+04
TotalSteps: 3.65e+

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7982   0.6193   3.1052 151.8980 133.1056  53.3563
***** Episode 98726, Mean R = -17.3  Std R = 7.6  Min R = -41.3
PolicyLoss: 1.64
Policy_Entropy: 0.23
Policy_KL: 0.0144
Policy_SD: 0.533
Steps: 1.2e+04
TotalSteps: 3.66e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00382


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0031   8.4490   2.3854   3.1146
ADVA:  (20821,) (35403,) 0.5881140016382792
ADV1:  0.0004154326575512967 -0.0001727470938687384 0.008208597936932833 0.05162458867938785 -0.08505148234959947
ADVB:  (19948,) (35403,) 0.5634550744287208
ADV2:  0.09465713685454552 0.31309060969009317 0.4532618921216381 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9632   0.3765   1.9861 151.8980 133.1056  53.3563
***** Episode 98757, Mean R = -17.3  Std R = 7.6  Min R = -39.4
PolicyLoss: 1.65
Policy_Entropy: 0.23
Policy_KL: 0.0156
Policy_SD: 0.533
Steps: 1.17e+04
TotalSteps:

ADVA:  (21417,) (35092,) 0.6103100421748546
ADV1:  0.00061864572909116 -0.00011435787426344856 0.00805167011792116 0.05581748018244237 -0.05731163491901842
ADVB:  (19823,) (35092,) 0.5648865838367719
ADV2:  0.10613103847022054 0.3086407547065351 0.4395507718455915 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9809   0.3921   1.9445 151.8980 133.1056  53.3563
***** Episode 98974, Mean R = -16.5  Std R = 6.2  Min R = -37.4
PolicyLoss: 1.63
Policy_Entropy: 0.231
Policy_KL: 0.00956
Policy_SD: 0.535
Steps: 1.16e+04
TotalSteps: 3.67e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00486


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0018   0.0011   0.0039   8.4490   2.3854   3.1146
ADVA:  (20087,) (34996,) 0.5739798834152474
ADV1:  0.0008925060205219933 0.00041556905244449733 0.007059818655069333 0.049502063672611285 -0.05703100922642279
ADVB:  (21001,) (34996,) 0.6000971539604526
ADV2:  0.18151622028014264 0.36979679152675593 0.484500872697673

a_f      |    0.02    0.02 |    0.66    1.87 |   -1.54   -3.12 |    1.42    3.14
w_f      |    0.01    0.00   -0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.02 |    0.03    0.02    0.02
w_rewards |   -0.27 |    0.16 |   -0.97 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.52
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.98   -0.99 |    0.98    0.96
cs_angles |  0.0028  0.0047 |  0.0842  0.0790 | -0.9796 -0.9920 |  0.9808  0.9560
optical_flow |  0.0000  0.0000 |  0.0196  0.0180 | -1.1585 -1.0312 |  1.0333  1.1192
v_err    | -0.0110 |  0.0606 | -0.4524 |  0.1030
landing_rewards |    9.55 |    2.08 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -21.31 |    5.05 |  -42.44 |  -12.57
steps    |     377 |      20 |     338 |     418
***** Episode 99253, Mean R = -17.1  Std R = 5.0  Min R = -32.4
PolicyLoss: 1.7
Policy

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8361   0.3302   1.4458 151.8980 133.1056  53.3563
Update Cnt = 3210    ET =   1301.7   Stats:  Mean, Std, Min, Max
r_f      |  -24.29   29.34  -31.66 |  176.64  175.27  198.48 | -391.50 -387.51 -391.95 |  360.88  381.26  396.33
v_f      |    0.01   -0.01    0.01 |    0.04    0.04    0.05 |   -0.10   -0.13   -0.11 |    0.09    0.10    0.10
r_i      |  -80.57  110.11 -129.30 |  658.23  657.26  759.37 |-1293.41-1277.47-1324.61 | 1275.85 1302.41 1293.62
v_i      |    0.01   -0.01    0.01 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.09    0.10    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.38
norm_vf  |    0.08 |    0.01 |    0.04 |    0.14
gs_f     |    1.36 |    1.87 |    0.01 |   15.53
thrust   |    0.01   -0.00    0.00 |    0.66    0.67    0.66 |   -3.16   -3.36   -3.39 |    3.42    3.21    3.45
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.51 |    0.17 |    1.08 |    2.14
rewards  |  -15.82 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1802   0.5014   2.2630 151.8980 133.1056  53.3563
***** Episode 99811, Mean R = -16.3  Std R = 5.6  Min R = -34.2
PolicyLoss: 1.82
Policy_Entropy: 0.231
Policy_KL: 0.00849
Policy_SD: 0.538
Steps: 1.18e+04
TotalSteps: 3.7e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.00391


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0048   0.0027   0.0105   8.4490   2.3854   3.1146
ADVA:  (18728,) (35161,) 0.5326355905690965
ADV1:  0.0 -0.0007359730474428874 0.007490254807831145 0.04291080836052941 -0.05204954681246943
ADVB:  (17999,) (35161,) 0.5119023918546116
ADV2:  0.023801927915804374 0.3701930636901191 0.5488159196055649 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4081   0.5955   2.5758 151.8980 133.1056  53.3563
***** Episode 99842, Mean R = -17.3  Std R = 5.9  Min R = -32.5
PolicyLoss: 2.15
Policy

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7904   0.9393   3.6775 151.8980 133.1056  53.3563
***** Episode 100059, Mean R = -16.5  Std R = 5.9  Min R = -31.3
PolicyLoss: 1.8
Policy_Entropy: 0.23
Policy_KL: 0.00894
Policy_SD: 0.537
Steps: 1.16e+04
TotalSteps: 3.71e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.0048


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0011   0.0041   8.4490   2.3854   3.1146
ADVA:  (20692,) (35113,) 0.5892974112152194
ADV1:  0.0016848180946672908 0.0009991558543455909 0.007602224966805052 0.04965627224394875 -0.10849462874976307
ADVB:  (23302,) (35113,) 0.6636288554096773
ADV2:  0.28197860568193667 0.4269685521847883 0.5017560501819193 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5037   0.6174   2.6501 151.8980 133.1056  53.3563
***** Episode 100090, Mean R = -16.4  Std R = 6.5  Min R = -31.0
PolicyLo

ADVA:  (20987,) (35197,) 0.596272409580362
ADV1:  0.0 -0.0007157499334707067 0.007802192665740934 0.08111589707731803 -0.06600150976883312
ADVB:  (17546,) (35197,) 0.4985083956018979
ADV2:  0.0 0.2752062376442455 0.444399515965257 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.3125   0.9742   4.0787 151.8980 133.1056  53.3563
***** Episode 100307, Mean R = -16.6  Std R = 6.8  Min R = -36.0
PolicyLoss: 1.63
Policy_Entropy: 0.23
Policy_KL: 0.0099
Policy_SD: 0.54
Steps: 1.17e+04
TotalSteps: 3.72e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00381


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0029   8.4490   2.3854   3.1146
ADVA:  (19336,) (35003,) 0.552409793446276
ADV1:  0.0 -0.0006615714058875472 0.00775815138370195 0.08111589707731803 -0.0618664297361539
ADVB:  (18663,) (35003,) 0.5331828700397109
ADV2:  0.05894687240393491 0.30658146328861713 0.4619950393372654 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0003   0.0018   8.4490   2.3854   3.1146
ADVA:  (22155,) (35162,) 0.6300836129912974
ADV1:  0.0008516920327610346 -0.0005631138007809707 0.010145503883956474 0.12658571657578083 -0.18732561359457933
ADVB:  (21449,) (35162,) 0.6100051191627325
ADV2:  0.16246437177113746 0.3037436366904426 0.3954134269958185 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7131   0.2430   1.2929 151.8980 133.1056  53.3563
***** Episode 100555, Mean R = -16.9  Std R = 6.0  Min R = -34.8
PolicyLoss: 1.48
Policy_Entropy: 0.231
Policy_KL: 0.00692
Policy_SD: 0.538
Steps: 1.16e+04
TotalSteps: 3.73e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00446


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0002   0.0001   0.0003   8.4490   2.3854   3.1146
ADVA:  (22507,) (35305,) 0.6375017702874947
ADV1:  0.0012930175785850432 6.721798895861074e-05 0.009421120076554592 0.05890713941308029 -0.18732561359457933
ADVB

cs_angles |  0.0057  0.0022 |  0.0800  0.0772 | -0.9551 -0.9946 |  0.9364  0.9970
optical_flow |  0.0000  0.0001 |  0.0195  0.0191 | -1.1827 -1.0601 |  1.1233  1.1700
v_err    | -0.0113 |  0.0610 | -0.4621 |  0.2087
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.05 |    0.02
tracking_rewards |  -20.92 |    4.83 |  -49.11 |  -12.11
steps    |     378 |      21 |     334 |     420
***** Episode 100803, Mean R = -16.8  Std R = 5.6  Min R = -28.8
PolicyLoss: 2.03
Policy_Entropy: 0.23
Policy_KL: 0.0142
Policy_SD: 0.54
Steps: 1.15e+04
TotalSteps: 3.74e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00349


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0011   0.0042   8.4490   2.3854   3.1146
ADVA:  (22209,) (35066,) 0.633348542747961
ADV1:  0.00038751577915424086 -0.0005042494556600037 0.008454956107172371 0.05904454095206302 -0.06212480300323342
ADVB:  (18514,) (35066,) 0.527975817030742
ADV2:  0.043

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |   -0.02   -0.12 |    0.66    1.87 |   -1.42   -3.12 |    1.47    3.11
w_f      |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.02   -0.02   -0.03 |    0.04    0.02    0.01
w_rewards |   -0.23 |    0.15 |   -0.74 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.68
seeker_angles |    0.00    0.01 |    0.08    0.08 |   -0.99   -0.96 |    0.99    0.99
cs_angles |  0.0019  0.0054 |  0.0809  0.0767 | -0.9937 -0.9564 |  0.9878  0.9875
optical_flow | -0.0000  0.0001 |  0.0183  0.0185 | -1.1918 -0.9799 |  1.0332  0.9198
v_err    | -0.0114 |  0.0612 | -0.4554 |  0.1459
landing_rewards |    9.52 |    2.15 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -20.86 |    4.74 |  -45.73 |  -13.53
steps    |     378 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.7087   0.7194   3.2705 151.8980 133.1056  53.3563
Update Cnt = 3270    ET =   1402.4   Stats:  Mean, Std, Min, Max
r_f      |   -2.44  -13.54   10.69 |  184.17  170.98  194.92 | -395.42 -391.03 -384.43 |  372.99  369.01  372.82
v_f      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.12   -0.10 |    0.11    0.09    0.10
r_i      |  -20.19  -41.77   29.98 |  678.17  663.45  756.09 |-1343.11-1304.18-1253.14 | 1343.62 1327.27 1290.66
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.10 |    0.09    0.09    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.40
norm_vf  |    0.08 |    0.01 |    0.03 |    0.13
gs_f     |    1.44 |    2.30 |    0.01 |   18.41
thrust   |    0.00    0.00    0.00 |    0.67    0.67    0.66 |   -3.28   -3.43   -3.37 |    3.25    3.41    3.44
norm_thrust |    0.90 |    0.71 |    0.00 |    3.46
fuel     |    1.53 |    0.17 |    1.11 |    2.03
rewards  |  -15.84 

 *** BROKE ***   0 1.6247502565383911
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  54.5611   0.0000  54.5611 192.6540 133.1056  77.3677
***** Episode 101671, Mean R = -16.9  Std R = 9.5  Min R = -55.0
PolicyLoss: 2.17
Policy_Entropy: 0.224
Policy_KL: 1.62
Policy_SD: 0.544
Steps: 1.17e+04
TotalSteps: 3.77e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.00447


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0021   0.0013   0.0048   8.4490   2.3854   3.1146
ADVA:  (21525,) (35078,) 0.6136324761959062
ADV1:  0.0006772775946319726 -0.0004466199570872768 0.009550017889202555 0.09219723977219285 -0.12748705914974268
ADVB:  (20185,) (35078,) 0.5754318946348138
ADV2:  0.11361620105359313 0.31086737853781676 0.4401480604316713 3.0 0.0
 *** BROKE ***   1 0.5004041790962219
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  40.4313  14.8700  55.3013 192.6540 133.1056  77

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0024   8.4490   2.3854   3.1146
ADVA:  (18038,) (35293,) 0.5110928512736237
ADV1:  1.2271646373650247e-05 -0.00037353877521223753 0.010098147465217533 0.3181416724100993 -0.25861013772024727
ADVB:  (20401,) (35293,) 0.5780466381435412
ADV2:  0.11749978884205736 0.316611308347477 0.45956228977063907 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   7.4668   3.0813  14.6984 192.6540 133.1056  77.3677
***** Episode 101919, Mean R = -16.4  Std R = 4.4  Min R = -24.5
PolicyLoss: 1.64
Policy_Entropy: 0.23
Policy_KL: 0.0399
Policy_SD: 0.532
Steps: 1.17e+04
TotalSteps: 3.78e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00439


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0007   0.0028   8.4490   2.3854   3.1146
ADVA:  (19704,) (35347,) 0.5574447619317057
ADV1:  0.0007451847358584

***** Episode 102136, Mean R = -16.2  Std R = 6.5  Min R = -29.4
PolicyLoss: 1.51
Policy_Entropy: 0.231
Policy_KL: 0.00771
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 3.79e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00407


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0011   0.0044   8.4490   2.3854   3.1146
ADVA:  (20622,) (35416,) 0.5822791958436865
ADV1:  0.0007430956697821664 0.0003572864854834517 0.00724620471631809 0.061071000554848986 -0.08946503417930274
ADVB:  (21100,) (35416,) 0.5957759204879151
ADV2:  0.1610364946414387 0.3580768508407486 0.48628769888512946 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1129   0.5212   2.5119 192.6540 133.1056  77.3677
***** Episode 102167, Mean R = -14.8  Std R = 4.4  Min R = -27.4
PolicyLoss: 1.78
Policy_Entropy: 0.231
Policy_KL: 0.00691
Policy_SD: 0.531
Steps: 1.18e+04
TotalSteps: 3.79e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00421


ValFun  Gra

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8198   0.3564   1.6704 192.6540 133.1056  77.3677
***** Episode 102384, Mean R = -16.5  Std R = 5.6  Min R = -33.4
PolicyLoss: 1.41
Policy_Entropy: 0.231
Policy_KL: 0.00672
Policy_SD: 0.535
Steps: 1.16e+04
TotalSteps: 3.8e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.00431


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0008   0.0005   0.0018   8.4490   2.3854   3.1146
ADVA:  (22484,) (35048,) 0.6415202008673819
ADV1:  0.000665512812665095 -0.000310909559785734 0.011657898369758982 0.3799047700221959 -0.2574663919551309
ADVB:  (19694,) (35048,) 0.5619150878794795
ADV2:  0.08674059755971159 0.2642477556341447 0.3890079261588684 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5627   0.2152   1.1004 192.6540 133.1056  77.3677
***** Episode 102415, Mean R = -15.4  Std R = 5.8  Min R = -31.9
PolicyLoss: 1.4
Policy_Entropy: 0.231
Policy_KL: 0.00822
Policy_SD: 0.539
Steps: 1.16e+04
TotalSteps:

attitude |   -0.01    0.06    0.02 |    1.25    0.67    1.86 |   -3.14   -1.55   -3.14 |    3.14    1.55    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.06   -0.05 |    0.06    0.05    0.07
a_f      |    0.06    0.00 |    0.66    1.86 |   -1.45   -3.13 |    1.51    3.13
w_f      |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.06   -0.02   -0.02 |    0.04    0.02    0.02
w_rewards |   -0.24 |    0.19 |   -1.67 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |   -0.16 |    2.84 |  -50.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.51
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.92   -0.98 |    1.00    1.00
cs_angles |  0.0007  0.0016 |  0.0800  0.0809 | -0.9209 -0.9833 |  0.9974  0.9995
optical_flow |  0.0001  0.0001 |  0.0207  0.0199 | -1.0125 -1.1885 |  1.0237  1.0021
v_err    | -0.0118 |  0.0618 | -0.9117 |  0.1658
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |    2

ADVA:  (20515,) (35097,) 0.5845228936946177
ADV1:  0.0013595269710223908 0.0006022261400731498 0.010863239082547233 0.10116605088867689 -0.1834120990397035
ADVB:  (21122,) (35097,) 0.6018178191868251
ADV2:  0.18455284837890662 0.38121330593723407 0.5024324363245807 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.9498   0.7463   3.7619 192.6540 133.1056  77.3677
Update Cnt = 3320    ET =   1409.4   Stats:  Mean, Std, Min, Max
r_f      |  -10.31    3.29   12.68 |  190.73  175.55  196.88 | -398.27 -387.47 -385.41 |  382.43  357.41  390.55
v_f      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.09 |    0.11    0.09    0.10
r_i      |  -16.16   11.72   33.67 |  686.59  671.33  759.41 |-1306.71-1311.84-1271.05 | 1349.80 1300.04 1280.45
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.10    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.43
norm_vf  |    0.08 |    0.01 |    0.05 |    0.12
gs_f    

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0047   0.0031   0.0116   8.4490   2.3854   3.1146
ADVA:  (18393,) (34899,) 0.527035158600533
ADV1:  0.0012452857867740804 0.0006323439997752098 0.008972669028269785 0.08821295430314124 -0.1402898122866948
ADVB:  (21691,) (34899,) 0.6215364337087023
ADV2:  0.23608018840522058 0.4186198315652306 0.5217365501425509 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1106   0.3783   1.7607 192.6540 133.1056  77.3677
***** Episode 103221, Mean R = -16.3  Std R = 5.6  Min R = -29.3
PolicyLoss: 2
Policy_Entropy: 0.231
Policy_KL: 0.0101
Policy_SD: 0.542
Steps: 1.16e+04
TotalSteps: 3.83e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.00539


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0031   0.0016   0.0072   8.4490   2.3854   3.1146
ADVA:  (18921,) (35012,) 0.5404147149548726
ADV1:  0.0 -0.0005064087669267091 0.008971319771784109 0.08821295430314124 -0.1402898122866948
ADVB:  (17685,) (35012,) 0.50

***** Episode 103438, Mean R = -16.5  Std R = 5.8  Min R = -28.8
PolicyLoss: 1.69
Policy_Entropy: 0.232
Policy_KL: 0.008
Policy_SD: 0.537
Steps: 1.17e+04
TotalSteps: 3.84e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00547


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0011   0.0041   8.4490   2.3854   3.1146
ADVA:  (18523,) (35080,) 0.5280216647662486
ADV1:  0.0012701598136395115 0.0007829960420383306 0.007792927472356136 0.06421864108919512 -0.09732165164126039
ADVB:  (23563,) (35080,) 0.6716932725199544
ADV2:  0.30123277396785153 0.4503140984918716 0.5375269655180737 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.5787   0.1897   1.0199 192.6540 133.1056  77.3677
***** Episode 103469, Mean R = -15.1  Std R = 5.6  Min R = -27.9
PolicyLoss: 1.99
Policy_Entropy: 0.232
Policy_KL: 0.00774
Policy_SD: 0.536
Steps: 1.16e+04
TotalSteps: 3.84e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00474


ValFun  Grad

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   6.8368   2.7234  14.1693 192.6540 133.1056  77.3677
***** Episode 103686, Mean R = -16.7  Std R = 8.1  Min R = -45.8
PolicyLoss: 1.8
Policy_Entropy: 0.231
Policy_KL: 0.0113
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 3.85e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00364


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0031   8.4490   2.3854   3.1146
ADVA:  (20593,) (35106,) 0.586594884065402
ADV1:  0.0011617127064145194 -1.9270644376331882e-05 0.008968557888372725 0.055904784514803796 -0.1436840650565966
ADVB:  (22100,) (35106,) 0.6295220190280864
ADV2:  0.2136880011505442 0.354064256487255 0.4309448507602924 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6400   0.5887   2.8303 192.6540 133.1056  77.3677
***** Episode 103717, Mean R = -16.8  Std R = 6.5  Min R = -34.0
PolicyLoss: 1.67
Policy_Entropy: 0.231
Policy_KL: 0.00903
Policy_SD: 0.534
Steps: 1.18e+04
TotalStep

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7405   0.2020   1.1513 192.6540 133.1056  77.3677
***** Episode 103934, Mean R = -17.4  Std R = 5.7  Min R = -29.3
PolicyLoss: 1.56
Policy_Entropy: 0.231
Policy_KL: 0.0103
Policy_SD: 0.546
Steps: 1.16e+04
TotalSteps: 3.86e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00556


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0022   8.4490   2.3854   3.1146
ADVA:  (20357,) (34848,) 0.5841655188246098
ADV1:  0.0008925486252591933 2.4717900929979905e-05 0.008502879334682646 0.08836415150449864 -0.07843764634513078
ADVB:  (21502,) (34848,) 0.6170224977043158
ADV2:  0.19357506479867426 0.3489167747793684 0.44239243778230813 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8548   0.2706   1.4279 192.6540 133.1056  77.3677
***** Episode 103965, Mean R = -16.1  Std R = 6.6  Min R = -38.8
PolicyLoss: 1.67
Policy_Entropy: 0.232
Policy_KL: 0.00771
Policy_SD: 0.545
Steps: 1.17e+04
TotalSt

attitude |    0.04   -0.01    0.04 |    1.24    0.67    1.85 |   -3.14   -1.54   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.07   -0.05 |    0.06    0.05    0.07
a_f      |   -0.00    0.02 |    0.67    1.85 |   -1.43   -3.14 |    1.54    3.14
w_f      |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.03    0.02    0.01
w_rewards |   -0.25 |    0.17 |   -1.02 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.78
seeker_angles |    0.00    0.00 |    0.08    0.07 |   -0.99   -0.95 |    1.00    1.00
cs_angles |  0.0035  0.0041 |  0.0826  0.0747 | -0.9935 -0.9530 |  0.9992  0.9968
optical_flow |  0.0000  0.0002 |  0.0193  0.0186 | -0.9476 -0.9935 |  0.8794  1.1310
v_err    | -0.0111 |  0.0612 | -0.4531 |  0.2639
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3930   0.6535   2.7772 192.6540 133.1056  77.3677
Update Cnt = 3370    ET =   1937.7   Stats:  Mean, Std, Min, Max
r_f      |   11.03    6.95    8.65 |  188.81  169.57  206.51 | -380.56 -376.45 -396.42 |  397.20  388.40  383.82
v_f      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.10 |    0.10    0.09    0.08
r_i      |   45.70   14.11   40.76 |  706.89  658.23  761.31 |-1363.32-1355.56-1322.24 | 1323.28 1272.61 1323.39
v_i      |   -0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.10   -0.09 |    0.10    0.09    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.41
norm_vf  |    0.07 |    0.01 |    0.03 |    0.12
gs_f     |    1.18 |    1.46 |    0.00 |    9.49
thrust   |    0.00    0.00    0.00 |    0.66    0.66    0.66 |   -3.44   -3.41   -3.45 |    3.43    3.27    3.38
norm_thrust |    0.89 |    0.72 |    0.00 |    3.46
fuel     |    1.51 |    0.19 |    1.11 |    2.54
rewards  |  -15.66 

ADVA:  (19146,) (35243,) 0.5432568169565587
ADV1:  0.0010202708970530458 0.0003716765299858588 0.008925113940466408 0.1847573171823122 -0.11604780187688657
ADVB:  (22347,) (35243,) 0.634083364072298
ADV2:  0.24307816249656333 0.3957923322375577 0.48808981256829626 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8779   0.3272   1.6454 192.6540 133.1056  77.3677
***** Episode 104771, Mean R = -14.9  Std R = 5.9  Min R = -34.6
PolicyLoss: 1.85
Policy_Entropy: 0.231
Policy_KL: 0.00857
Policy_SD: 0.542
Steps: 1.17e+04
TotalSteps: 3.89e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.0043


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0031   8.4490   2.3854   3.1146
ADVA:  (18239,) (35017,) 0.5208612959419711
ADV1:  0.0017230270441529697 0.0010838346505435334 0.007154324466256328 0.07898694172726034 -0.11604780187688657
ADVB:  (24417,) (35017,) 0.697289887768798
ADV2:  0.3818513038640442 0.49440808884814513 0.5332391426981216 3.

***** Episode 104988, Mean R = -16.1  Std R = 5.4  Min R = -29.8
PolicyLoss: 1.73
Policy_Entropy: 0.23
Policy_KL: 0.0181
Policy_SD: 0.536
Steps: 1.18e+04
TotalSteps: 3.9e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00428


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0005   0.0022   8.4490   2.3854   3.1146
ADVA:  (20317,) (35335,) 0.574982312155087
ADV1:  0.0 -0.0012199466333323871 0.007373241177254544 0.07614819767718795 -0.06461649146526188
ADVB:  (17193,) (35335,) 0.4865713881420688
ADV2:  0.0 0.2501695730814231 0.40145917206779697 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9754   0.3770   1.8504 192.6540 133.1056  77.3677
***** Episode 105019, Mean R = -15.0  Std R = 5.3  Min R = -33.1
PolicyLoss: 1.52
Policy_Entropy: 0.231
Policy_KL: 0.016
Policy_SD: 0.53
Steps: 1.18e+04
TotalSteps: 3.9e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00404


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0949   0.3869   2.1249 192.6540 133.1056  77.3677
***** Episode 105236, Mean R = -16.2  Std R = 4.8  Min R = -30.1
PolicyLoss: 2.19
Policy_Entropy: 0.232
Policy_KL: 0.00946
Policy_SD: 0.536
Steps: 1.19e+04
TotalSteps: 3.91e+07
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.00484


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0010   0.0037   8.4490   2.3854   3.1146
ADVA:  (18296,) (35454,) 0.5160489648558696
ADV1:  0.00032201869425518663 0.00024711331054390925 0.006766617359131613 0.10041742138655596 -0.10004407584114605
ADVB:  (21042,) (35454,) 0.593501438483669
ADV2:  0.18936977748899325 0.4205602624670071 0.553696659935009 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6458   0.6961   3.6037 192.6540 133.1056  77.3677
***** Episode 105267, Mean R = -15.3  Std R = 6.2  Min R = -36.8
PolicyLoss: 2.11
Policy_Entropy: 0.232
Policy_KL: 0.00899
Policy_SD: 0.531
Steps: 1.17e+04
TotalS

ADVA:  (16136,) (35060,) 0.46023958927552766
ADV1:  0.0005274222383312406 0.0006100459570371732 0.006286493178338633 0.0647644084454107 -0.07736645819523202
ADVB:  (22626,) (35060,) 0.6453508271534513
ADV2:  0.31147762142732516 0.5334758497623793 0.6418292741903334 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  25.0736  23.9719  80.6552 192.6540 133.1056  77.3677
***** Episode 105484, Mean R = -15.4  Std R = 5.2  Min R = -31.4
PolicyLoss: 2.46
Policy_Entropy: 0.23
Policy_KL: 0.0226
Policy_SD: 0.545
Steps: 1.17e+04
TotalSteps: 3.92e+07
VF_0_ExplainedVarNew: 0.997
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.00486


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0010   8.4490   2.3854   3.1146
ADVA:  (17900,) (35008,) 0.5113117001828154
ADV1:  0.0 -5.219069621053798e-05 0.005845222742289952 0.0647644084454107 -0.07736645819523202
ADVB:  (19085,) (35008,) 0.5451611060329068
ADV2:  0.09153501314318635 0.3925975403644713 0.5663226901375562 3.0 0.0
Policy  Gra

attitude |   -0.09    0.01    0.06 |    1.25    0.66    1.91 |   -3.14   -1.55   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.06   -0.05 |    0.07    0.05    0.07
a_f      |    0.00   -0.06 |    0.66    1.91 |   -1.39   -3.14 |    1.46    3.11
w_f      |    0.01    0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.03 |    0.04    0.02    0.01
w_rewards |   -0.21 |    0.15 |   -0.73 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.25 |    0.31 |    0.00 |    2.91
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -1.00   -0.98 |    0.98    1.00
cs_angles |  0.0028  0.0036 |  0.0842  0.0815 | -0.9957 -0.9789 |  0.9815  0.9968
optical_flow | -0.0000  0.0000 |  0.0192  0.0193 | -1.0847 -1.0285 |  0.8592  1.1864
v_err    | -0.0111 |  0.0613 | -0.4527 |  0.1221
landing_rewards |    9.77 |    1.49 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0999   0.4731   2.2153 192.6540 133.1056  77.3677
Update Cnt = 3420    ET =   1635.9   Stats:  Mean, Std, Min, Max
r_f      |   -3.96   -4.40    1.14 |  183.44  170.21  202.90 | -390.16 -395.03 -394.33 |  395.61  387.24  397.42
v_f      |   -0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.11   -0.08   -0.09 |    0.08    0.09    0.10
r_i      |   -1.59  -40.82   16.82 |  661.69  651.07  787.36 |-1287.01-1286.41-1372.00 | 1347.74 1269.00 1275.53
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.10   -0.09   -0.09 |    0.10    0.09    0.10
norm_rf  |    0.14 |    0.06 |    0.02 |    0.38
norm_vf  |    0.07 |    0.01 |    0.04 |    0.13
gs_f     |    1.37 |    2.08 |    0.02 |   19.92
thrust   |   -0.01    0.00    0.00 |    0.66    0.68    0.66 |   -3.34   -3.46   -3.45 |    3.43    3.26    3.44
norm_thrust |    0.90 |    0.72 |    0.00 |    3.46
fuel     |    1.52 |    0.16 |    1.12 |    2.20
rewards  |  -15.77 

ADVA:  (19562,) (34977,) 0.5592818137633302
ADV1:  0.00028715991946048555 1.3107766172566397e-05 0.007482170045603364 0.15394749430262267 -0.12497934988681175
ADVB:  (19594,) (34977,) 0.5601967006890242
ADV2:  0.11220873396076073 0.34239953765870706 0.4701045682024478 3.0 0.0


  entropy = np.sum( - p * np.log2(p)) / logp.shape[0]
  entropy = np.sum( - p * np.log2(p)) / logp.shape[0]


 *** BROKE ***   7 1.2850844860076904
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  13.6983  13.5835  41.8735 192.6540 133.1056  77.3677
***** Episode 106321, Mean R = -17.7  Std R = 7.4  Min R = -37.2
PolicyLoss: 1.89
Policy_Entropy: nan
Policy_KL: 1.29
Policy_SD: 0.546
Steps: 1.16e+04
TotalSteps: 3.95e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00513


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0006   0.0023   8.4490   2.3854   3.1146
ADVA:  (19747,) (34922,) 0.5654601683752363
ADV1:  4.0457992855571015e-05 -0.0001155443120430205 0.007435136569653345 0.15394749430262267 -0.12497934988681175
ADVB:  (18463,) (34922,) 0.5286925147471508
ADV2:  0.04875447715732141 0.3115800053268048 0.457698038858939 3.0 0.0
 *** BROKE ***   1 1.3263537883758545
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  42.1763  11.3110  53.4872 192.6540 133.1056  77.3677
***** Episode 106352, Mean R = -15.9  Std R = 5.0  Min R = -29.3
PolicyLoss: 1.87
Policy_En

ADVA:  (21796,) (35206,) 0.6190990172129751
ADV1:  6.133384808456113e-05 -0.00034877184523884923 0.00763717300260924 0.05374140838058994 -0.09913693725720252
ADVB:  (17404,) (35206,) 0.49434755439413736
ADV2:  0.0 0.28289319566908344 0.4421235968394093 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0496   0.3404   1.8682 192.6540 133.1056  77.3677
***** Episode 106569, Mean R = -16.0  Std R = 8.4  Min R = -54.6
PolicyLoss: 1.7
Policy_Entropy: 0.231
Policy_KL: 0.00877
Policy_SD: 0.544
Steps: 1.18e+04
TotalSteps: 3.96e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.991
VF_0_Loss : 0.00397


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0007   0.0028   8.4490   2.3854   3.1146
ADVA:  (20614,) (34978,) 0.5893418720338498
ADV1:  0.00037479586933924364 -0.00015588265196771358 0.008501853939169104 0.06828647892460721 -0.13403202881901555
ADVB:  (19103,) (34978,) 0.5461432900680427
ADV2:  0.07892283135406684 0.31505803193826576 0.4664418021068857 3.0 0.0
P

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0013   0.0047   8.4490   2.3854   3.1146
ADVA:  (20307,) (35323,) 0.574894544630977
ADV1:  0.0008890624124582268 0.00023292149724617612 0.008095284636917952 0.07019544948380824 -0.07825637370049449
ADVB:  (22262,) (35323,) 0.6302409195141976
ADV2:  0.21003747446697843 0.3823754733670039 0.49049159025246064 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9829   0.3073   1.5473 192.6540 133.1056  77.3677
***** Episode 106817, Mean R = -16.2  Std R = 6.0  Min R = -34.8
PolicyLoss: 1.8
Policy_Entropy: 0.232
Policy_KL: 0.00721
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 3.97e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00491


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0014   8.4490   2.3854   3.1146
ADVA:  (20381,) (35367,) 0.5762716656770436
ADV1:  0.0009867432599703404 0.00045018603176205077 0.008012831170570346 0.07424231163573702 -0.09010741424508062
ADVB

***** Episode 107034, Mean R = -15.8  Std R = 6.0  Min R = -32.7
PolicyLoss: 1.69
Policy_Entropy: 0.229
Policy_KL: 0.0275
Policy_SD: 0.532
Steps: 1.17e+04
TotalSteps: 3.97e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00437


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0027   8.4490   2.3854   3.1146
ADVA:  (16641,) (35378,) 0.47037707049578836
ADV1:  0.0 -0.0008497040719608944 0.007143394642261 0.06475049722274617 -0.08758066955353916
ADVB:  (18553,) (35378,) 0.5244219571485104
ADV2:  0.05143962769241012 0.36464283060152114 0.5400743597165255 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   7.9022   3.5894  15.6064 192.6540 133.1056  77.3677
***** Episode 107065, Mean R = -15.4  Std R = 5.0  Min R = -26.6
PolicyLoss: 2.06
Policy_Entropy: 0.231
Policy_KL: 0.0194
Policy_SD: 0.533
Steps: 1.17e+04
TotalSteps: 3.97e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.0043


ValFun  Gradients: u/sd/Max/C M

cs_angles | -0.0013  0.0053 |  0.0802  0.0778 | -0.9910 -0.9630 |  0.9749  0.9982
optical_flow |  0.0001  0.0001 |  0.0193  0.0206 | -1.0538 -1.1007 |  1.0949  1.1227
v_err    | -0.0111 |  0.0613 | -0.4565 |  0.2155
landing_rewards |    9.77 |    1.49 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -20.78 |    4.90 |  -51.01 |  -12.75
steps    |     377 |      21 |     333 |     418
***** Episode 107313, Mean R = -15.8  Std R = 4.9  Min R = -30.6
PolicyLoss: 2.08
Policy_Entropy: 0.232
Policy_KL: 0.00701
Policy_SD: 0.534
Steps: 1.16e+04
TotalSteps: 3.98e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00462


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0026   8.4490   2.3854   3.1146
ADVA:  (19623,) (34976,) 0.561041857273559
ADV1:  0.0005868038136980832 0.00025978658318508976 0.006840496408093135 0.06494527188784965 -0.0896374519584775
ADVB:  (20439,) (34976,) 0.5843721408966148
ADV2:  0.

thrust   |    0.00    0.00    0.00 |    0.64    0.66    0.65 |   -3.01   -3.43   -3.22 |    3.41    3.38    3.32
norm_thrust |    0.87 |    0.72 |    0.00 |    3.46
fuel     |    1.49 |    0.18 |    1.04 |    2.37
rewards  |  -15.18 |    5.19 |  -38.66 |   -6.85
fuel_rewards |   -4.28 |    0.52 |   -6.77 |   -2.98
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    2.89 |   13.27 |    0.00 |  145.33
norm_af  |    1.70 |    0.89 |    0.05 |    3.34
norm_wf  |    0.01 |    0.01 |    0.00 |    0.04
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |   -0.08   -0.02    0.05 |    1.21    0.69    1.78 |   -3.14   -1.54   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.07   -0.05 |    0.07    0.05    0.06
a_f      |   -0.01    0.10 |    0.70

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8907   0.2795   1.5495 192.6540 133.1056  77.3677
***** Episode 107902, Mean R = -15.5  Std R = 7.0  Min R = -39.8
PolicyLoss: 1.81
Policy_Entropy: 0.232
Policy_KL: 0.00636
Policy_SD: 0.534
Steps: 1.2e+04
TotalSteps: 4.01e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00448


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0016   8.4490   2.3854   3.1146
ADVA:  (18529,) (35583,) 0.5207261894725009
ADV1:  0.0010581182604982118 0.0005502647536042208 0.008020246756261326 0.0942120213757236 -0.10423214026638888
ADVB:  (23575,) (35583,) 0.6625354804260462
ADV2:  0.27623837899796877 0.43328830609057706 0.5127430506824975 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0618   0.4208   2.0200 192.6540 133.1056  77.3677
Update Cnt = 3480    ET =   1820.0   Stats:  Mean, Std, Min, Max
r_f      |    0.50  -15.22  -13.48 |  178.44  172.12  200.69 | -386.94 -367.20 -380.33 |  390.29  

ADVA:  (21964,) (35027,) 0.6270591258172267
ADV1:  0.0 -0.0013134085253645662 0.00765339222062235 0.03894246182947275 -0.054545940748275505
ADVB:  (15819,) (35027,) 0.4516230336597482
ADV2:  0.0 0.2052620814204483 0.3571079103039651 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.8034   0.2857   1.3668 192.6540 133.1056  77.3677
***** Episode 108150, Mean R = -14.3  Std R = 3.4  Min R = -22.7
PolicyLoss: 1.34
Policy_Entropy: 0.233
Policy_KL: 0.0105
Policy_SD: 0.539
Steps: 1.18e+04
TotalSteps: 4.02e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00468


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0014   8.4490   2.3854   3.1146
ADVA:  (22571,) (35234,) 0.6406028268150082
ADV1:  0.00046757722742070407 -0.00032775410480477644 0.007846184066989718 0.047508004218140776 -0.12385970972812421
ADVB:  (18718,) (35234,) 0.5312482261452006
ADV2:  0.046165315572499636 0.2563736581602518 0.3870583373702941 3.0 0.0
Policy  Gradients: u

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0015   0.0053   8.4490   2.3854   3.1146
ADVA:  (21417,) (35202,) 0.6084029316516107
ADV1:  0.0003959854805624715 -0.0002785425036882091 0.007703949024408119 0.0414534720176683 -0.07243814392575476
ADVB:  (19276,) (35202,) 0.5475825237202432
ADV2:  0.08062804383505663 0.3168073981721867 0.4384099065897647 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6820   0.2459   1.2211 192.6540 133.1056  77.3677
***** Episode 108398, Mean R = -16.1  Std R = 6.9  Min R = -35.1
PolicyLoss: 1.7
Policy_Entropy: 0.233
Policy_KL: 0.00707
Policy_SD: 0.531
Steps: 1.17e+04
TotalSteps: 4.03e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.00418


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0013   0.0048   8.4490   2.3854   3.1146
ADVA:  (22201,) (35234,) 0.6301016064029062
ADV1:  0.0 -0.0009276507001037008 0.007814138024883757 0.05222936238902909 -0.09124989632704417
ADVB:  (16610,) (35234,

***** Episode 108615, Mean R = -13.9  Std R = 4.4  Min R = -24.7
PolicyLoss: 2.23
Policy_Entropy: 0.233
Policy_KL: 0.0158
Policy_SD: 0.532
Steps: 1.17e+04
TotalSteps: 4.03e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00466


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0023   0.0014   0.0049   8.4490   2.3854   3.1146
ADVA:  (19047,) (35274,) 0.5399727844871577
ADV1:  0.0002797861626275735 0.00018234743072505097 0.0064225474034763025 0.04360595718846133 -0.06417157866276763
ADVB:  (20392,) (35274,) 0.57810285195895
ADV2:  0.15964468904717494 0.42998328602510266 0.563221985752103 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.3572   2.1767   9.5774 192.6540 133.1056  77.3677
***** Episode 108646, Mean R = -14.8  Std R = 3.8  Min R = -23.5
PolicyLoss: 2.2
Policy_Entropy: 0.234
Policy_KL: 0.00865
Policy_SD: 0.53
Steps: 1.16e+04
TotalSteps: 4.03e+07
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.996
VF_0_Loss : 0.00508


ValFun  Gradi

optical_flow |  0.0002  0.0001 |  0.0181  0.0200 | -0.9226 -1.0363 |  0.9669  1.0902
v_err    | -0.0110 |  0.0619 | -0.4556 |  0.5554
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.07 |    0.04
tracking_rewards |  -21.21 |    6.16 |  -83.07 |  -13.00
steps    |     378 |      21 |     310 |     419
***** Episode 108863, Mean R = -17.5  Std R = 6.6  Min R = -32.2
PolicyLoss: 1.57
Policy_Entropy: 0.231
Policy_KL: 0.0733
Policy_SD: 0.534
Steps: 1.18e+04
TotalSteps: 4.04e+07
VF_0_ExplainedVarNew: 0.988
VF_0_ExplainedVarOld: 0.982
VF_0_Loss : 0.00494


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0014   8.4490   2.3854   3.1146
ADVA:  (21691,) (35341,) 0.6137630514133726
ADV1:  -0.0003013108088329979 -0.0026091222027899118 0.011476771984480558 0.19393085061390175 -0.2009107046197488
ADVB:  (16284,) (35341,) 0.4607679465776294
ADV2:  0.0 0.2129483818515845 0.3747293609064621 3.0 0.0
Policy  Gradients: u/sd/Max/C Max

thrust   |    0.00    0.00   -0.00 |    0.65    0.67    0.65 |   -3.39   -3.42   -3.36 |    3.38    3.41    3.44
norm_thrust |    0.88 |    0.72 |    0.00 |    3.46
fuel     |    1.51 |    0.18 |    1.16 |    2.30
rewards  |  -15.73 |    5.76 |  -45.64 |   -6.95
fuel_rewards |   -4.33 |    0.53 |   -6.59 |   -3.34
glideslope_rewards |    0.00 |    0.00 |    0.00 |    0.00
glideslope_penalty |    0.00 |    0.00 |    0.00 |    0.00
glideslope |    3.05 |   15.37 |    0.00 | 2604.25
norm_af  |    1.68 |    0.93 |    0.08 |    3.35
norm_wf  |    0.01 |    0.01 |    0.00 |    0.03
rh_penalty |    0.00 |    0.00 |    0.00 |    0.00
att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |   -0.04   -0.02    0.12 |    1.16    0.66    1.80 |   -3.14   -1.57   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.06   -0.05 |    0.06    0.05    0.07
a_f      |   -0.03    0.12 |    0.65

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6714   0.2105   1.1302 192.6540 133.1056  77.3677
***** Episode 109452, Mean R = -16.7  Std R = 7.0  Min R = -34.1
PolicyLoss: 1.75
Policy_Entropy: 0.233
Policy_KL: 0.00511
Policy_SD: 0.542
Steps: 1.17e+04
TotalSteps: 4.07e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00459


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0019   0.0009   0.0039   8.4490   2.3854   3.1146
ADVA:  (21212,) (35042,) 0.6053307459619884
ADV1:  0.0006243903349508022 -5.760416213260773e-05 0.0116330679159569 0.37090777087647553 -0.10142089571082735
ADVB:  (20616,) (35042,) 0.5883225843273786
ADV2:  0.1261677121663379 0.3001622250643358 0.4224751230539129 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1226   0.4903   2.4865 192.6540 133.1056  77.3677
Update Cnt = 3530    ET =   1536.9   Stats:  Mean, Std, Min, Max
r_f      |  -21.41   14.73    5.82 |  190.47  161.10  201.80 | -388.00 -381.12 -376.24 |  376.96  37

ADVA:  (18465,) (35270,) 0.5235327473773745
ADV1:  0.0 -0.00047366314908002527 0.0068621231155202925 0.15057771715501728 -0.08933306941008223
ADVB:  (17047,) (35270,) 0.48332860788205273
ADV2:  0.0 0.3145806254364774 0.5092477913399743 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0117   0.3878   2.1314 192.6540 133.1056  77.3677
***** Episode 109700, Mean R = -15.9  Std R = 6.4  Min R = -38.4
PolicyLoss: 1.91
Policy_Entropy: 0.234
Policy_KL: 0.00903
Policy_SD: 0.527
Steps: 1.18e+04
TotalSteps: 4.07e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00329


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0046   0.0027   0.0100   8.4490   2.3854   3.1146
ADVA:  (16910,) (35213,) 0.48022037315764066
ADV1:  0.00012218742992766742 0.0006246239364891061 0.007518547349434997 0.15057771715501728 -0.08933306941008223
ADVB:  (20294,) (35213,) 0.5763212449947462
ADV2:  0.17501852618160993 0.4815687308864692 0.6406780657658327 3.0 0.0
Policy  Gradients: 

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0028   8.4490   2.3854   3.1146
ADVA:  (19149,) (35097,) 0.5456021882212155
ADV1:  0.0003778829233315271 3.918488438406111e-06 0.00669641681745575 0.05950706496348379 -0.07548039459203065
ADVB:  (20093,) (35097,) 0.5724990739949284
ADV2:  0.14573078092849762 0.3762854491148694 0.5173104995698399 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9792   0.3662   1.7436 192.6540 133.1056  77.3677
***** Episode 109948, Mean R = -15.4  Std R = 5.4  Min R = -29.0
PolicyLoss: 1.94
Policy_Entropy: 0.234
Policy_KL: 0.00815
Policy_SD: 0.534
Steps: 1.17e+04
TotalSteps: 4.08e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.988
VF_0_Loss : 0.00452


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0009   0.0005   0.0018   8.4490   2.3854   3.1146
ADVA:  (20112,) (35370,) 0.5686174724342663
ADV1:  0.0003639081957443292 -0.00021876728676061363 0.006950026600046674 0.05950706496348379 -0.07869057878010732
ADVB

***** Episode 110165, Mean R = -14.9  Std R = 5.3  Min R = -32.2
PolicyLoss: 0.838
Policy_Entropy: 0.234
Policy_KL: 0.0095
Policy_SD: 0.527
Steps: 1.18e+04
TotalSteps: 4.09e+07
VF_0_ExplainedVarNew: 0.98
VF_0_ExplainedVarOld: 0.973
VF_0_Loss : 0.0046


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0012   8.4490   2.3854   3.1146
ADVA:  (20437,) (35325,) 0.5785421089879689
ADV1:  0.0007173205453662934 -0.00321352855823265 0.04921873116196628 0.11332603524629775 -1.2961667649141315
ADVB:  (19675,) (35325,) 0.556970983722576
ADV2:  0.0524234468435564 0.18098862890260534 0.299811094001413 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.0209   2.0676   7.5246 192.6540 133.1056  77.3677
***** Episode 110196, Mean R = -17.5  Std R = 6.2  Min R = -35.7
PolicyLoss: 0.959
Policy_Entropy: 0.234
Policy_KL: 0.01
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 4.09e+07
VF_0_ExplainedVarNew: 0.982
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.00462


ValFun  Gradients: 

***** Episode 110413, Mean R = -17.9  Std R = 7.6  Min R = -38.2
PolicyLoss: 1.52
Policy_Entropy: 0.233
Policy_KL: 0.00903
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 4.1e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.00523


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0008   8.4490   2.3854   3.1146
ADVA:  (22686,) (35196,) 0.6445618820320491
ADV1:  0.0009460689943329263 -0.000488578914955221 0.009501401424501187 0.1844816736248227 -0.0819962913824831
ADVB:  (20752,) (35196,) 0.5896124559609046
ADV2:  0.12664137801866665 0.2898787886874227 0.40214596660313967 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.6632   0.5305   2.7140 192.6540 133.1056  77.3677
***** Episode 110444, Mean R = -14.5  Std R = 5.5  Min R = -30.0
PolicyLoss: 1.45
Policy_Entropy: 0.233
Policy_KL: 0.0106
Policy_SD: 0.536
Steps: 1.16e+04
TotalSteps: 4.1e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00518


ValFun  Gradien

attitude |   -0.07   -0.02    0.03 |    1.25    0.68    1.82 |   -3.14   -1.55   -3.14 |    3.14    1.57    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.05   -0.08   -0.05 |    0.07    0.05    0.06
a_f      |   -0.02    0.00 |    0.69    1.83 |   -1.53   -3.13 |    1.46    3.13
w_f      |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.02   -0.02 |    0.03    0.02    0.02
w_rewards |   -0.17 |    0.16 |   -0.90 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.23 |    0.29 |    0.00 |    2.98
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.97   -0.99 |    0.99    1.00
cs_angles |  0.0020  0.0040 |  0.0775  0.0776 | -0.9726 -0.9897 |  0.9863  0.9984
optical_flow |  0.0001  0.0000 |  0.0182  0.0200 | -0.8450 -1.1405 |  0.9375  1.0834
v_err    | -0.0112 |  0.0622 | -0.4534 |  0.3618
landing_rewards |    9.45 |    2.28 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3726   0.5597   3.0909 192.6540 133.1056  77.3677
Update Cnt = 3580    ET =   1508.9   Stats:  Mean, Std, Min, Max
r_f      |   -0.95   -9.65   -5.21 |  175.90  175.66  207.39 | -378.65 -375.13 -382.25 |  390.80  378.51  395.08
v_f      |    0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.13   -0.08   -0.11 |    0.12    0.09    0.10
r_i      |  -16.20  -24.13  -40.83 |  670.33  655.58  788.92 |-1316.72-1281.49-1336.08 | 1346.49 1364.12 1245.27
v_i      |   -0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.09 |    0.09    0.09    0.09
norm_rf  |    0.14 |    0.06 |    0.03 |    0.39
norm_vf  |    0.08 |    0.01 |    0.04 |    0.14
gs_f     |    1.57 |    3.54 |    0.00 |   52.15
thrust   |    0.00    0.00    0.00 |    0.66    0.67    0.66 |   -3.34   -3.44   -3.44 |    3.35    3.20    3.44
norm_thrust |    0.90 |    0.71 |    0.00 |    3.46
fuel     |    1.52 |    0.18 |    1.13 |    2.11
rewards  |  -16.23 

ADVA:  (20988,) (34810,) 0.6029301924734272
ADV1:  0.0 -0.0005882147948703091 0.008202904991113305 0.06260641526923738 -0.1311821394150562
ADVB:  (16661,) (34810,) 0.4786268313702959
ADV2:  0.0 0.27682845604003203 0.4860963786404828 3.0 0.0
 *** BROKE ***   0 0.6547433137893677
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  82.8389   0.0000  82.8389 192.6540 133.1056  77.3677
***** Episode 111281, Mean R = -16.2  Std R = 7.1  Min R = -42.0
PolicyLoss: 1.89
Policy_Entropy: 0.23
Policy_KL: 0.655
Policy_SD: 0.547
Steps: 1.16e+04
TotalSteps: 4.13e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00464


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0014   8.4490   2.3854   3.1146
ADVA:  (21509,) (35082,) 0.6131064363491249
ADV1:  0.0 -0.00022918213394277238 0.007068285558390833 0.06260641526923738 -0.1311821394150562
ADVB:  (16280,) (35082,) 0.4640556410694943
ADV2:  0.0 0.276132784540796 0.4988669295104289 3.0 0.0
 *** BROKE ***   1 1.03

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0011   0.0007   0.0025   8.4490   2.3854   3.1146
ADVA:  (16663,) (35007,) 0.4759905161824778
ADV1:  4.755711376238855e-05 0.00018132766120081324 0.005601797357942407 0.060570538335364854 -0.06059277256550982
ADVB:  (20481,) (35007,) 0.585054417687891
ADV2:  0.16638064740762837 0.4312305787386853 0.5823380305290832 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.6464   3.0574  12.1355 192.6540 133.1056  77.3677
***** Episode 111529, Mean R = -14.4  Std R = 4.4  Min R = -25.1
PolicyLoss: 2.16
Policy_Entropy: 0.234
Policy_KL: 0.0085
Policy_SD: 0.532
Steps: 1.17e+04
TotalSteps: 4.14e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00435


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0015   8.4490   2.3854   3.1146
ADVA:  (18720,) (35000,) 0.5348571428571428
ADV1:  7.934106247010214e-05 5.872918823762923e-06 0.006300469844825332 0.11993123398915473 -0.06059277256550982
ADVB:

***** Episode 111746, Mean R = -16.0  Std R = 5.7  Min R = -31.0
PolicyLoss: 1.93
Policy_Entropy: 0.234
Policy_KL: 0.00977
Policy_SD: 0.537
Steps: 1.16e+04
TotalSteps: 4.15e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00518


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0007   0.0004   0.0015   8.4490   2.3854   3.1146
ADVA:  (20068,) (35221,) 0.5697737145452997
ADV1:  0.00035308038784637435 -0.00011949479499716795 0.007839981204181955 0.12264464008273346 -0.06057666079002033
ADVB:  (20018,) (35221,) 0.568354106924846
ADV2:  0.11454023407441846 0.3278788002115446 0.47221192197747364 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1961   0.4072   1.9223 192.6540 133.1056  77.3677
***** Episode 111777, Mean R = -17.7  Std R = 7.1  Min R = -34.5
PolicyLoss: 1.69
Policy_Entropy: 0.234
Policy_KL: 0.0105
Policy_SD: 0.541
Steps: 1.18e+04
TotalSteps: 4.15e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00517


ValFun  Gr

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.6197   0.8992   4.7806 192.6540 133.1056  77.3677
***** Episode 111994, Mean R = -16.1  Std R = 6.9  Min R = -36.1
PolicyLoss: 1.55
Policy_Entropy: 0.233
Policy_KL: 0.0204
Policy_SD: 0.545
Steps: 1.18e+04
TotalSteps: 4.16e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00521


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0029   0.0018   0.0065   8.4490   2.3854   3.1146
ADVA:  (19741,) (35024,) 0.5636420740063957
ADV1:  0.0008926993927922643 0.0001829738753545432 0.008147882309704957 0.05678247365358896 -0.10051432579005487
ADVB:  (20722,) (35024,) 0.591651439013248
ADV2:  0.16916557392359766 0.36319230990955287 0.47007980604066946 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1737   0.4119   1.9923 192.6540 133.1056  77.3677
***** Episode 112025, Mean R = -14.9  Std R = 3.9  Min R = -23.8
Poli

attitude |   -0.07   -0.03    0.17 |    1.29    0.67    1.90 |   -3.14   -1.55   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.06   -0.05 |    0.08    0.05    0.07
a_f      |   -0.04    0.14 |    0.67    1.92 |   -1.44   -3.13 |    1.54    3.14
w_f      |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.03   -0.03 |    0.04    0.02    0.02
w_rewards |   -0.16 |    0.15 |   -1.23 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.76
seeker_angles |    0.01    0.00 |    0.08    0.08 |   -0.98   -0.99 |    0.94    0.93
cs_angles |  0.0052  0.0038 |  0.0834  0.0794 | -0.9769 -0.9949 |  0.9372  0.9334
optical_flow | -0.0000  0.0001 |  0.0187  0.0206 | -1.0358 -1.1854 |  1.0864  1.0581
v_err    | -0.0112 |  0.0616 | -0.4534 |  0.1910
landing_rewards |    9.48 |    2.21 |    0.00 |   10.00
landing_margin |   -0

ADVA:  (20736,) (35259,) 0.5881051646388156
ADV1:  0.00013594317423975925 -0.0004350731479199933 0.008758971126903818 0.06182935070653223 -0.06599491621565623
ADVB:  (19833,) (35259,) 0.5624946822087977
ADV2:  0.13507581168114496 0.37960867403181753 0.49648728436980283 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7592   0.2965   1.4169 192.6540 133.1056  77.3677
Update Cnt = 3630    ET =   1407.3   Stats:  Mean, Std, Min, Max
r_f      |   -2.46   13.11    0.67 |  188.57  170.63  199.65 | -394.46 -394.82 -389.53 |  398.35  365.24  387.25
v_f      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.11 |    0.10    0.11    0.10
r_i      |  -61.40   36.02    0.31 |  687.55  669.15  754.72 |-1289.65-1321.27-1319.32 | 1287.69 1366.21 1299.00
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.10    0.10    0.09
norm_rf  |    0.14 |    0.07 |    0.01 |    0.41
norm_vf  |    0.08 |    0.01 |    0.04 |    0.13
gs_f

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0013   8.4490   2.3854   3.1146
ADVA:  (22144,) (34912,) 0.6342804766269478
ADV1:  0.0007585735579760076 -0.00015731233158666102 0.007968034395811813 0.05717156462419348 -0.07149844795608573
ADVB:  (20364,) (34912,) 0.583295142071494
ADV2:  0.12470991893560618 0.303155711251333 0.40993436544633666 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6091   0.2086   0.9748 192.6540 133.1056  77.3677
***** Episode 112831, Mean R = -14.7  Std R = 4.1  Min R = -27.1
PolicyLoss: 1.53
Policy_Entropy: 0.233
Policy_KL: 0.00839
Policy_SD: 0.538
Steps: 1.16e+04
TotalSteps: 4.19e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.0046


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0023   8.4490   2.3854   3.1146
ADVA:  (19696,) (34938,) 0.5637414849161372
ADV1:  0.0006283235091619106 -5.9925460980251145e-06 0.006740880604542744 0.03761399732311321 -0.06886472539045663
ADV

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0011   0.0044   8.4490   2.3854   3.1146
ADVA:  (17801,) (35276,) 0.5046207052953849
ADV1:  -0.0004686563312466807 -0.0018032353954755582 0.009806571661456851 0.11525193943657397 -0.18834128195916267
ADVB:  (17134,) (35276,) 0.4857126658351287
ADV2:  0.0 0.3040718796206988 0.4817680561399265 3.0 0.0
 *** BROKE ***   2 1.236889362335205
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  23.4882  15.8003  41.6770 192.6540 133.1056  77.3677
***** Episode 113079, Mean R = -17.9  Std R = 8.4  Min R = -48.2
PolicyLoss: 1.96
Policy_Entropy: 0.227
Policy_KL: 1.24
Policy_SD: 0.537
Steps: 1.18e+04
TotalSteps: 4.2e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00518


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0011   8.4490   2.3854   3.1146
ADVA:  (19769,) (35157,) 0.56230622635606
ADV1:  0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2900   0.4976   2.1146 192.6540 133.1056  77.3677
***** Episode 113296, Mean R = -14.6  Std R = 5.5  Min R = -32.0
PolicyLoss: 1.58
Policy_Entropy: 0.234
Policy_KL: 0.00626
Policy_SD: 0.53
Steps: 1.18e+04
TotalSteps: 4.21e+07
VF_0_ExplainedVarNew: 0.992
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00567


Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0008   0.0028   8.4490   2.3854   3.1146
ADVA:  (20076,) (35188,) 0.570535409798795
ADV1:  0.000425193643817089 -0.00018768105485023905 0.007856521176983837 0.12841088591992061 -0.09961734059946337
ADVB:  (20484,) (35188,) 0.5821302716835285
ADV2:  0.13845129162377817 0.3318486361690732 0.45573634679392494 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.7627   0.2870   1.3412 192.6540 133.1056  77.3677
***** Episode 113327, Mean R = -16.2  Std R = 5.4  Min R = -31.8
Poli

ADVA:  (21459,) (35100,) 0.6113675213675214
ADV1:  0.00013250526076595548 -0.00032963691867056357 0.008413023863540328 0.13294802109460857 -0.11182900838719434
ADVB:  (18267,) (35100,) 0.5204273504273504
ADV2:  0.0344218447011225 0.30097118337560536 0.45910830716989265 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3556   0.3992   2.1234 192.6540 133.1056  77.3677
***** Episode 113544, Mean R = -15.7  Std R = 5.6  Min R = -29.9
PolicyLoss: 1.7
Policy_Entropy: 0.233
Policy_KL: 0.0087
Policy_SD: 0.536
Steps: 1.17e+04
TotalSteps: 4.22e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.984
VF_0_Loss : 0.0045


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0003   0.0002   0.0007   8.4490   2.3854   3.1146
ADVA:  (20733,) (35328,) 0.5868716032608695
ADV1:  1.6586571412399026e-05 -0.0006155917849715194 0.008564074345881268 0.13294802109460857 -0.11182900838719434
ADVB:  (19330,) (35328,) 0.5471580615942029
ADV2:  0.07907735614379213 0.3149444660486652 0.4601710916166

att_rewards |    0.00 |    0.00 |    0.00 |    0.00
att_penalty |    0.00 |    0.00 |    0.00 |    0.00
attitude |    0.01    0.00   -0.01 |    1.26    0.69    1.87 |   -3.14   -1.56   -3.14 |    3.14    1.56    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.07   -0.05 |    0.07    0.05    0.07
a_f      |    0.01   -0.04 |    0.70    1.86 |   -1.50   -3.13 |    1.40    3.12
w_f      |    0.00    0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.03   -0.04 |    0.04    0.03    0.01
w_rewards |   -0.16 |    0.14 |   -0.86 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.23 |    0.30 |    0.00 |    2.57
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.99   -0.98 |    1.00    0.98
cs_angles |  0.0014  0.0024 |  0.0814  0.0779 | -0.9853 -0.9802 |  0.9987  0.9771
optical_flow |  0.0000  0.0002 |  0.0184  0.0198 | -0.9215 -1.0494 |  1.1045  1.1531
v_err    | -0.0117 |  

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0010   8.4490   2.3854   3.1146
ADVA:  (21127,) (35210,) 0.6000284010224368
ADV1:  0.0 -0.0006686054292026087 0.007653878320661568 0.04184096346109867 -0.05997742736085119
ADVB:  (18697,) (35210,) 0.531013916500994
ADV2:  0.049723267401013585 0.2813642390905906 0.4319293498404207 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.2508   2.4142  10.8944 192.6540 133.1056  77.3677
Update Cnt = 3680    ET =   1373.1   Stats:  Mean, Std, Min, Max
r_f      |    1.70   10.30   22.61 |  189.00  171.42  190.95 | -382.09 -371.80 -381.82 |  398.19  369.36  385.62
v_f      |    0.00   -0.00   -0.01 |    0.04    0.04    0.05 |   -0.09   -0.08   -0.10 |    0.09    0.09    0.10
r_i      |   -9.82   25.42   63.65 |  705.94  672.50  723.89 |-1269.11-1335.34-1270.51 | 1323.79 1336.34 1284.22
v_i      |    0.00   -0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.09
norm_rf  |    0.14 |

Dynamics: Max Disturbance (m/s^2):  [0.00130178 0.00138111 0.00142963] 0.0023761225757454076
ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0012   0.0050   8.4490   2.3854   3.1146
ADVA:  (20793,) (35323,) 0.5886532853947852
ADV1:  -0.00045201067117211884 -0.002097023247330407 0.008984679113049737 0.07301803353944886 -0.12530639336606864
ADVB:  (16253,) (35323,) 0.46012513093451857
ADV2:  0.0 0.2071442564334737 0.36666895207323946 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.4041   0.5913   2.7403 192.6540 133.1056  77.3677
***** Episode 114381, Mean R = -16.1  Std R = 5.3  Min R = -28.7
PolicyLoss: 1.32
Policy_Entropy: 0.234
Policy_KL: 0.0116
Policy_SD: 0.526
Steps: 1.18e+04
TotalSteps: 4.25e+07
VF_0_ExplainedVarNew: 0.99
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00406


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0013   8.4490   2.3854   3.1146
ADVA:  (19360,) (35301,) 0.5484263901872468
ADV1:  -1.1839197880517127e-05 -0.00137

***** Episode 114598, Mean R = -14.8  Std R = 5.0  Min R = -28.5
PolicyLoss: 1.81
Policy_Entropy: 0.234
Policy_KL: 0.00773
Policy_SD: 0.539
Steps: 1.17e+04
TotalSteps: 4.26e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.989
VF_0_Loss : 0.00578


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0014   0.0050   8.4490   2.3854   3.1146
ADVA:  (20621,) (35109,) 0.5873422769090546
ADV1:  0.0015288776132900148 0.0004901322559415215 0.00860053152454753 0.08371136216958674 -0.06634551382672656
ADVB:  (23180,) (35109,) 0.6602295707653308
ADV2:  0.2678752734837949 0.40888024936354994 0.4751921788935116 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2902   0.4139   2.2094 192.6540 133.1056  77.3677
***** Episode 114629, Mean R = -14.6  Std R = 4.3  Min R = -25.4
PolicyLoss: 1.83
Policy_Entropy: 0.234
Policy_KL: 0.00977
Policy_SD: 0.529
Steps: 1.18e+04
TotalSteps: 4.26e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00468


Dynamics: M

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  37.8573   4.6423  46.6785 192.6540 133.1056  77.3677
***** Episode 114846, Mean R = -14.3  Std R = 5.0  Min R = -32.2
PolicyLoss: 2.04
Policy_Entropy: 0.233
Policy_KL: 0.0236
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 4.27e+07
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.994
VF_0_Loss : 0.00586


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0088   0.0054   0.0210   8.4490   2.3854   3.1146
ADVA:  (21743,) (34897,) 0.6230621543399146
ADV1:  -0.00038985444366843904 -0.003465514986739294 0.05542240259155803 0.05219807189270215 -2.1914529514312764
ADVB:  (12906,) (34897,) 0.36983121758317333
ADV2:  0.0 0.05000551734754034 0.10560058943853463 1.7951607218244188 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   4.5848   2.5811   9.8503 192.6540 133.1056  77.3677
***** Episode 114877, Mean R = -20.1  Std R = 25.7  Min R = -158.7
PolicyLoss: 0.399
Policy_Entropy: 0.234
Policy_KL: 0.00991
Policy_SD: 0.537
Steps: 1.13e+04
To

ADVA:  (23657,) (34770,) 0.6803853897037676
ADV1:  0.0010568058108928313 -0.0019646821425214826 0.08163377262946127 0.29031065833524305 -3.9520955688770245
ADVB:  (19378,) (34770,) 0.5573195283290193
ADV2:  0.020852119334197346 0.08425987839380981 0.17077691089269514 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6006   0.2310   1.2415 192.6540 133.1056  77.3677
***** Episode 115094, Mean R = -17.0  Std R = 5.9  Min R = -36.0
PolicyLoss: 0.451
Policy_Entropy: 0.234
Policy_KL: 0.00484
Policy_SD: 0.536
Steps: 1.16e+04
TotalSteps: 4.28e+07
VF_0_ExplainedVarNew: 0.974
VF_0_ExplainedVarOld: 0.95
VF_0_Loss : 0.00579


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0020   0.0012   0.0054   8.4490   2.3854   3.1146
ADVA:  (23139,) (34858,) 0.6638074473578519
ADV1:  0.0 -0.0029989973415696673 0.08176866307144533 0.29031065833524305 -3.9520955688770245
ADVB:  (15084,) (34858,) 0.43272706408858796
ADV2:  0.0 0.07773873279734009 0.20139740014156257 3.0 0.0
Policy  Gradients: u/

w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.07   -0.06   -0.05 |    0.07    0.06    0.06
a_f      |    0.02   -0.06 |    0.65    1.89 |   -1.43   -3.14 |    1.49    3.14
w_f      |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.03   -0.04 |    0.04    0.03    0.02
w_rewards |   -0.18 |    0.16 |   -1.25 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.31 |    0.00 |    3.02
seeker_angles |    0.00    0.01 |    0.08    0.08 |   -0.98   -0.98 |    1.00    0.95
cs_angles |  0.0040  0.0057 |  0.0800  0.0795 | -0.9784 -0.9829 |  0.9975  0.9528
optical_flow | -0.0000 -0.0000 |  0.0194  0.0197 | -1.0454 -1.0127 |  0.9559  1.0151
v_err    | -0.0116 |  0.0610 | -0.4531 |  0.2159
landing_rewards |    9.71 |    1.68 |    0.00 |   10.00
landing_margin |   -0.03 |    0.01 |   -0.06 |    0.02
tracking_rewards |  -21.05 |    6.27 |  -85.31 |  -13.31
steps    |     379 |  

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1773   0.4436   2.0323 192.6540 133.1056  77.3677
Update Cnt = 3730    ET =   1332.2   Stats:  Mean, Std, Min, Max
r_f      |   -7.68   -5.74    9.26 |  184.57  183.61  193.26 | -391.40 -382.49 -390.78 |  394.89  370.95  384.46
v_f      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.11   -0.10   -0.10 |    0.10    0.11    0.09
r_i      |  -36.74  -21.82   52.82 |  667.66  689.56  757.22 |-1262.85-1346.21-1261.91 | 1366.04 1314.27 1306.83
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.08    0.10    0.10
norm_rf  |    0.14 |    0.07 |    0.01 |    0.52
norm_vf  |    0.08 |    0.01 |    0.05 |    0.11
gs_f     |    1.22 |    1.69 |    0.01 |   18.48
thrust   |   -0.00    0.00    0.00 |    0.66    0.65    0.65 |   -3.38   -3.44   -3.43 |    3.37    3.45    3.43
norm_thrust |    0.87 |    0.72 |    0.00 |    3.46
fuel     |    1.48 |    0.19 |    1.11 |    2.25
rewards  |  -16.15 

ADVA:  (17937,) (35217,) 0.5093278814209047
ADV1:  0.0007097053111727192 0.0008062815310065592 0.008649303496482633 0.33369327339235116 -0.13829869228444802
ADVB:  (21539,) (35217,) 0.6116080302126814
ADV2:  0.17848503844954763 0.3813011853845493 0.5155513155109713 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.1523   0.3962   1.9781 192.6540 133.1056  77.3677
***** Episode 115931, Mean R = -14.0  Std R = 4.8  Min R = -31.9
PolicyLoss: 1.84
Policy_Entropy: 0.234
Policy_KL: 0.00815
Policy_SD: 0.526
Steps: 1.17e+04
TotalSteps: 4.31e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.979
VF_0_Loss : 0.00614


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0005   0.0003   0.0012   8.4490   2.3854   3.1146
ADVA:  (17136,) (35283,) 0.4856729869909021
ADV1:  8.278430907706323e-05 0.00014316621734477067 0.008356083659558661 0.33369327339235116 -0.13829869228444802
ADVB:  (20469,) (35283,) 0.5801377433891676
ADV2:  0.13554496379138423 0.34434985552895947 0.49326508344174

ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0010   0.0006   0.0025   8.4490   2.3854   3.1146
ADVA:  (20298,) (35039,) 0.5792973543765518
ADV1:  0.0 6.592348173296519e-05 0.01764597775897176 0.5485764734398354 -0.22072064505502298
ADVB:  (17398,) (35039,) 0.4965324352863952
ADV2:  0.0 0.2658113796267192 0.46805495473788017 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.1000   0.7310   3.5774 192.6540 133.1056  77.3677
***** Episode 116179, Mean R = -16.9  Std R = 9.7  Min R = -60.8
PolicyLoss: 1.58
Policy_Entropy: 0.235
Policy_KL: 0.0121
Policy_SD: 0.519
Steps: 1.17e+04
TotalSteps: 4.32e+07
VF_0_ExplainedVarNew: 0.987
VF_0_ExplainedVarOld: 0.946
VF_0_Loss : 0.00466


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0022   0.0013   0.0049   8.4490   2.3854   3.1146
ADVA:  (18204,) (35146,) 0.5179536789392819
ADV1:  0.0003334860541643128 0.0003502785175670926 0.0137216213141359 0.5485764734398354 -0.13828166019866184
ADVB:  (20035,) (35146,) 0.570050645877198
A

***** Episode 116396, Mean R = -16.1  Std R = 7.7  Min R = -50.8
PolicyLoss: 1.59
Policy_Entropy: 0.236
Policy_KL: 0.0127
Policy_SD: 0.519
Steps: 1.16e+04
TotalSteps: 4.33e+07
VF_0_ExplainedVarNew: 0.984
VF_0_ExplainedVarOld: 0.978
VF_0_Loss : 0.00559


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0008   0.0033   8.4490   2.3854   3.1146
ADVA:  (22114,) (34998,) 0.631864677981599
ADV1:  0.0 -0.0014005200833826237 0.011603518232421579 0.06521178214060347 -0.24809592134872505
ADVB:  (16221,) (34998,) 0.4634836276358649
ADV2:  0.0 0.22260486120295353 0.4149833119854579 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5542   0.5619   2.6930 192.6540 133.1056  77.3677
***** Episode 116427, Mean R = -17.4  Std R = 8.1  Min R = -45.3
PolicyLoss: 1.41
Policy_Entropy: 0.235
Policy_KL: 0.0188
Policy_SD: 0.527
Steps: 1.17e+04
TotalSteps: 4.33e+07
VF_0_ExplainedVarNew: 0.985
VF_0_ExplainedVarOld: 0.971
VF_0_Loss : 0.00569


ValFun  Gradients: u/sd/Max/C Max/Max u/Max s

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   2.5833   1.0992   5.1615 192.6540 133.1056  77.3677
***** Episode 116644, Mean R = -15.1  Std R = 4.9  Min R = -26.1
PolicyLoss: 1.38
Policy_Entropy: 0.235
Policy_KL: 0.00807
Policy_SD: 0.523
Steps: 1.18e+04
TotalSteps: 4.34e+07
VF_0_ExplainedVarNew: 0.986
VF_0_ExplainedVarOld: 0.985
VF_0_Loss : 0.00568


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0006   0.0003   0.0012   8.4490   2.3854   3.1146
ADVA:  (20459,) (35520,) 0.5759853603603604
ADV1:  0.0 -0.0013526968620654375 0.008383767101744412 0.06474275449285832 -0.07261618003613551
ADVB:  (18188,) (35520,) 0.5120495495495495
ADV2:  0.01831634592018267 0.25974668197866635 0.4187974183380955 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3965   0.5650   2.8095 192.6540 133.1056  77.3677
***** Episode 116675, Mean R = -17.3  Std R = 6.2  Min R = -33.4
PolicyLoss: 1.48
Policy_Entropy: 0.234
Policy_KL: 0.0115
Policy_SD: 0.521
Steps: 1.18e+04
TotalSteps: 4.34e+07
VF

w_f      |    0.00   -0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.03   -0.04 |    0.04    0.02    0.01
w_rewards |   -0.17 |    0.13 |   -0.65 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.23 |    0.30 |    0.00 |    3.08
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.95   -0.93 |    1.00    0.94
cs_angles |  0.0025  0.0006 |  0.0780  0.0775 | -0.9492 -0.9286 |  1.0000  0.9428
optical_flow |  0.0001  0.0001 |  0.0186  0.0175 | -0.9515 -1.0184 |  1.2000  1.0245
v_err    | -0.0117 |  0.0612 | -0.4532 |  0.2203
landing_rewards |    9.52 |    2.15 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.01
tracking_rewards |  -20.72 |    5.03 |  -41.99 |  -13.06
steps    |     379 |      20 |     336 |     420
***** Episode 116923, Mean R = -15.5  Std R = 5.6  Min R = -30.4
PolicyLoss: 1.91
Policy_Entropy: 0.235
Policy_KL: 0.0138
Policy_SD: 0.519
Steps: 1.17e+04
TotalSteps: 

Update Cnt = 3780    ET =   1201.4   Stats:  Mean, Std, Min, Max
r_f      |   -2.28    0.18    4.24 |  183.61  177.92  203.06 | -388.67 -374.86 -382.24 |  396.63  387.81  395.30
v_f      |    0.00    0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.10   -0.10 |    0.10    0.09    0.09
r_i      |  -21.08   -2.71  -19.10 |  686.53  678.51  750.04 |-1219.23-1289.60-1268.55 | 1315.09 1271.10 1305.11
v_i      |    0.00    0.00   -0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.10    0.09
norm_rf  |    0.14 |    0.07 |    0.02 |    0.45
norm_vf  |    0.08 |    0.01 |    0.04 |    0.13
gs_f     |    1.18 |    1.68 |    0.01 |   14.52
thrust   |    0.00    0.00   -0.00 |    0.65    0.65    0.65 |   -3.42   -3.41   -3.34 |    3.46    3.35    3.36
norm_thrust |    0.87 |    0.72 |    0.00 |    3.46
fuel     |    1.47 |    0.16 |    1.07 |    2.10
rewards  |  -15.70 |    5.58 |  -56.79 |   -6.41
fuel_rewards |   -4.22 |    0.47 |   -6.00 |   -3.06
glideslope_rewards |

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0700   0.3711   1.9632 192.6540 133.1056  77.3677
***** Episode 117481, Mean R = -17.3  Std R = 6.7  Min R = -38.9
PolicyLoss: 1.59
Policy_Entropy: 0.235
Policy_KL: 0.00913
Policy_SD: 0.535
Steps: 1.18e+04
TotalSteps: 4.37e+07
VF_0_ExplainedVarNew: 0.989
VF_0_ExplainedVarOld: 0.986
VF_0_Loss : 0.00645


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0035   0.0020   0.0075   8.4490   2.3854   3.1146
ADVA:  (20240,) (35263,) 0.5739727192808326
ADV1:  0.001985836355201524 0.0013297267990923364 0.00965875369456128 0.18810682779198062 -0.22270583862421167
ADVB:  (24134,) (35263,) 0.6844000794033406
ADV2:  0.30235167288755516 0.44734398238915085 0.5336988549838199 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.5570   1.1788   6.0019 192.6540 133.1056  77.3677
***** Episode 117512, Mean R = -15.4  Std R = 6.1  Min R = -38.9
PolicyLoss: 1.92
Policy_Entropy: 0.235
Policy_KL: 0.0114
Policy_SD: 0.531
Steps: 1.18e+04
TotalSte

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   5.1253   2.5912  10.2052 192.6540 133.1056  77.3677
***** Episode 117729, Mean R = -14.7  Std R = 5.1  Min R = -29.5
PolicyLoss: 2.19
Policy_Entropy: 0.234
Policy_KL: 0.00782
Policy_SD: 0.527
Steps: 1.18e+04
TotalSteps: 4.38e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00629


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0013   0.0007   0.0033   8.4490   2.3854   3.1146
ADVA:  (19860,) (35245,) 0.5634841821534969
ADV1:  0.00040952304890551176 0.00011608956675266991 0.008354441533155144 0.15757630334714845 -0.08445660741126271
ADVB:  (19657,) (35245,) 0.5577244999290679
ADV2:  0.09832454453453801 0.3317573927858531 0.4851340663558122 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.3029   0.5305   2.6844 192.6540 133.1056  77.3677
***** Episode 117760, Mean R = -17.6  Std R = 6.6  Min R = -30.7
PolicyLoss: 1.74
Policy_Entropy: 0.235
Policy_KL: 0.00841
Policy_SD: 0.529
Steps: 1.16e+04
Total

 *** BROKE ***   18 1.0142008066177368
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  43.1014  38.9127 159.5030 192.6540 133.1056  77.3677
***** Episode 117977, Mean R = -15.4  Std R = 5.3  Min R = -29.9
PolicyLoss: 2.08
Policy_Entropy: 0.228
Policy_KL: 1.01
Policy_SD: 0.525
Steps: 1.17e+04
TotalSteps: 4.39e+07
VF_0_ExplainedVarNew: 0.995
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.00661


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0004   0.0002   0.0010   8.4490   2.3854   3.1146
ADVA:  (18583,) (35323,) 0.5260878181354924
ADV1:  0.0 0.00011224691887216458 0.006313993248364885 0.1220790874433284 -0.10432765416458323
ADVB:  (18861,) (35323,) 0.5339580443337203
ADV2:  0.06625111627140695 0.3690797475724896 0.5507379602352959 3.0 0.0
 *** BROKE ***   0 1.996217966079712
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd : 149.1984   0.0000 149.1984 192.6540 149.1984  77.3677
***** Episode 118008, Mean R = -15.9  Std R = 5.7  Min R = -33.1
PolicyLoss: 2.41
Policy_Entropy: 0.228
Pol

ADVA:  (20581,) (34843,) 0.5906781850012915
ADV1:  0.0021242133596138663 0.0008861162166632386 0.008844989120186506 0.04957961152841456 -0.08696021798101072
ADVB:  (24670,) (34843,) 0.7080331773957467
ADV2:  0.348117564315868 0.4427482460299991 0.4754995514372417 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.6666   0.2420   1.1335 192.6540 149.1984  77.3677
***** Episode 118225, Mean R = -14.6  Std R = 3.9  Min R = -25.6
PolicyLoss: 1.85
Policy_Entropy: 0.234
Policy_KL: 0.00467
Policy_SD: 0.522
Steps: 1.16e+04
TotalSteps: 4.4e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00772


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0027   8.4490   2.3854   3.1146
ADVA:  (18347,) (34865,) 0.5262297432955686
ADV1:  0.0014916532313989945 0.0009454965090464105 0.007768770808546569 0.08825908660333492 -0.08465704983176459
ADVB:  (23557,) (34865,) 0.6756632726229743
ADV2:  0.3501711572755595 0.4943410516317197 0.5561730225046495 3.0 

seeker_angles |   -0.00    0.00 |    0.08    0.08 |   -0.99   -0.97 |    0.99    0.98
cs_angles | -0.0009  0.0037 |  0.0774  0.0799 | -0.9902 -0.9691 |  0.9929  0.9789
optical_flow | -0.0000  0.0001 |  0.0181  0.0192 | -0.9883 -0.8993 |  1.1338  0.9614
v_err    | -0.0118 |  0.0607 | -0.4526 |  0.1024
landing_rewards |    9.61 |    1.93 |    0.00 |   10.00
landing_margin |   -0.02 |    0.01 |   -0.06 |    0.03
tracking_rewards |  -20.52 |    4.63 |  -46.64 |  -13.20
steps    |     378 |      21 |     328 |     416
***** Episode 118473, Mean R = -13.8  Std R = 3.8  Min R = -25.7
PolicyLoss: 1.64
Policy_Entropy: 0.235
Policy_KL: 0.00882
Policy_SD: 0.523
Steps: 1.18e+04
TotalSteps: 4.41e+07
VF_0_ExplainedVarNew: 0.991
VF_0_ExplainedVarOld: 0.99
VF_0_Loss : 0.00674


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0024   0.0015   0.0054   8.4490   2.3854   3.1146
ADVA:  (19120,) (35370,) 0.5405711054566016
ADV1:  0.000865589598220683 0.00026461570631440215 0.007956605446320009 0.052852

attitude |    0.02    0.06   -0.13 |    1.20    0.67    1.83 |   -3.14   -1.57   -3.14 |    3.14    1.55    3.14
w        |    0.00   -0.00    0.00 |    0.01    0.01    0.01 |   -0.06   -0.07   -0.05 |    0.07    0.05    0.06
a_f      |    0.06   -0.16 |    0.66    1.84 |   -1.51   -3.14 |    1.40    3.14
w_f      |    0.01    0.00   -0.00 |    0.01    0.01    0.01 |   -0.01   -0.03   -0.04 |    0.04    0.03    0.01
w_rewards |   -0.17 |    0.12 |   -0.63 |    0.00
w_penalty |    0.00 |    0.00 |    0.00 |    0.00
fov_penalty |    0.00 |    0.00 |    0.00 |    0.00
theta_cv |    0.24 |    0.30 |    0.00 |    2.86
seeker_angles |    0.00    0.00 |    0.08    0.08 |   -0.96   -0.97 |    0.99    0.98
cs_angles |  0.0006  0.0010 |  0.0813  0.0810 | -0.9577 -0.9749 |  0.9902  0.9819
optical_flow |  0.0000  0.0001 |  0.0190  0.0207 | -1.0583 -1.0831 |  1.4159  0.9852
v_err    | -0.0119 |  0.0612 | -0.4570 |  0.1401
landing_rewards |    9.58 |    2.00 |    0.00 |   10.00
landing_margin |   -0

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.5446   0.6409   3.2170 192.6540 149.1984  77.3677
Update Cnt = 3840    ET =   1560.5   Stats:  Mean, Std, Min, Max
r_f      |    3.44   10.76    4.70 |  187.21  168.56  198.53 | -400.87 -392.92 -388.91 |  389.31  383.57  377.50
v_f      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.09    0.09    0.10
r_i      |   15.96   30.24   14.13 |  669.60  662.04  775.07 |-1337.74-1252.53-1303.24 | 1264.54 1342.15 1369.26
v_i      |   -0.00   -0.00    0.00 |    0.04    0.04    0.05 |   -0.09   -0.09   -0.10 |    0.10    0.09    0.09
norm_rf  |    0.14 |    0.07 |    0.02 |    0.45
norm_vf  |    0.07 |    0.01 |    0.04 |    0.11
gs_f     |    1.52 |    2.98 |    0.01 |   38.66
thrust   |    0.01   -0.00   -0.00 |    0.64    0.64    0.65 |   -3.37   -3.40   -3.42 |    3.30    3.32    3.36
norm_thrust |    0.86 |    0.71 |    0.00 |    3.46
fuel     |    1.47 |    0.17 |    1.09 |    2.02
rewards  |  -14.90 

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0526   0.4786   2.0426 192.6540 149.1984  77.3677
***** Episode 119341, Mean R = -14.2  Std R = 5.5  Min R = -32.5
PolicyLoss: 1.36
Policy_Entropy: 0.236
Policy_KL: 0.011
Policy_SD: 0.524
Steps: 1.18e+04
TotalSteps: 4.44e+07
VF_0_ExplainedVarNew: 0.994
VF_0_ExplainedVarOld: 0.993
VF_0_Loss : 0.00927


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0015   0.0009   0.0036   8.4490   2.3854   3.1146
ADVA:  (20346,) (35094,) 0.5797572234570012
ADV1:  0.0 -0.00033186054984633113 0.007717457471231196 0.08461070951439786 -0.09645732399620915
ADVB:  (17316,) (35094,) 0.49341767823559585
ADV2:  0.0 0.28550057589622324 0.46984845238111483 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.2652   0.4061   2.0732 192.6540 149.1984  77.3677
***** Episode 119372, Mean R = -15.2  Std R = 4.6  Min R = -25.5
PolicyLoss: 1.68
Policy_Entropy: 0.237
Policy_KL: 0.0102
Policy_SD: 0.52
Steps: 1.18e+04
TotalSteps: 4.44e+07
VF_0_ExplainedVarN

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :  18.3857   9.6811  33.2261 192.6540 149.1984  77.3677
***** Episode 119589, Mean R = -14.0  Std R = 4.4  Min R = -25.9
PolicyLoss: 1.7
Policy_Entropy: 0.234
Policy_KL: 0.009
Policy_SD: 0.524
Steps: 1.17e+04
TotalSteps: 4.45e+07
VF_0_ExplainedVarNew: 0.993
VF_0_ExplainedVarOld: 0.992
VF_0_Loss : 0.00596


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0012   0.0007   0.0031   8.4490   2.3854   3.1146
ADVA:  (20227,) (35089,) 0.5764484596312235
ADV1:  0.0 -0.0005518479317655126 0.0077333820659907975 0.060718210163408226 -0.10502596144432635
ADVB:  (17753,) (35089,) 0.5059420331157912
ADV2:  0.00930468008390437 0.29448756939160814 0.48377671683054596 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   3.5730   1.5378   6.9620 192.6540 149.1984  77.3677
***** Episode 119620, Mean R = -15.7  Std R = 6.7  Min R = -30.7
PolicyLoss: 1.71
Policy_Entropy: 0.234
Policy_KL: 0.00735
Policy_SD: 0.531
Steps: 1.16e+04
TotalSteps: 4.45e+07
V

Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.9480   0.2962   1.5013 192.6540 149.1984  77.3677
***** Episode 119837, Mean R = -14.9  Std R = 3.7  Min R = -22.5
PolicyLoss: 1.8
Policy_Entropy: 0.236
Policy_KL: 0.00804
Policy_SD: 0.526
Steps: 1.18e+04
TotalSteps: 4.46e+07
VF_0_ExplainedVarNew: 0.996
VF_0_ExplainedVarOld: 0.995
VF_0_Loss : 0.00689


ValFun  Gradients: u/sd/Max/C Max/Max u/Max sd :   0.0014   0.0008   0.0030   8.4490   2.3854   3.1146
ADVA:  (18314,) (35181,) 0.520565077740826
ADV1:  0.0002133858850168393 0.0005616844681885478 0.006545302990818475 0.0680331067947203 -0.08705691372483526
ADVB:  (19650,) (35181,) 0.5585401210880874
ADV2:  0.1297984275147339 0.4326029688161379 0.6008008604454782 3.0 0.0
Policy  Gradients: u/sd/Max/C Max/Max u/Max sd :   1.0341   0.4101   2.0578 192.6540 149.1984  77.3677
***** Episode 119868, Mean R = -15.0  Std R = 5.5  Min R = -26.5
PolicyLoss: 2.27
Policy_Entropy: 0.235
Policy_KL: 0.00987
Policy_SD: 0.528
Steps: 1.17e+04
TotalSteps

In [5]:
fname = "optimize_WATTVW_FOV-AR=5-AWR-RPT1"
policy.save_params(fname)
