In [1]:
from tensorflow.keras import backend as K
from tensorflow.keras import activations, initializers
from tensorflow.keras.layers import Layer

import tensorflow as tf
import tensorflow_probability as tfp

tfd = tfp.distributions
tfp.__version__

'0.8.0'

In [2]:
def bnn_extractor(flat_observations, net_arch, act_fun):
    """
    Constructs an variational layer that receives observations as an input and outputs a latent representation for the policy and
    a value network. The ``net_arch`` parameter allows to specify the amount and size of the hidden layers and how many
    of them are shared between the policy network and the value network. It is assumed to be a list with the following
    structure:
    1. An arbitrary length (zero allowed) number of integers each specifying the number of units in a shared layer.
       If the number of ints is zero, there will be no shared layers.
    2. An optional dict, to specify the following non-shared layers for the value network and the policy network.
       It is formatted like ``dict(vf=[<value layer sizes>], pi=[<policy layer sizes>])``.
       If it is missing any of the keys (pi or vf), no non-shared layers (empty list) is assumed.
    For example to construct a network with one shared layer of size 55 followed by two non-shared layers for the value
    network of size 255 and a single non-shared layer of size 128 for the policy network, the following layers_spec
    would be used: ``[55, dict(vf=[255, 255], pi=[128])]``. A simple shared network topology with two layers of size 128
    would be specified as [128, 128].
    :param flat_observations: (tf.Tensor) The observations to base policy and value function on.
    :param net_arch: ([int or dict]) The specification of the policy and value networks.
        See above for details on its formatting.
    :param act_fun: (tf function) The activation function to use for the networks.
    :return: (tf.Tensor, tf.Tensor) latent_policy, latent_value of the specified network.
        If all layers are shared, then ``latent_policy == latent_value``
    """
    latent = flat_observations
    policy_only_layers = []  # Layer sizes of the network that only belongs to the policy network
    value_only_layers = []  # Layer sizes of the network that only belongs to the value network
    kernel_divergence_fn=lambda q, p, _: tfp.distributions.kl_divergence(q, p)

    # Iterate through the shared layers and build the shared parts of the network
    for idx, layer in enumerate(net_arch):
        if isinstance(layer, int):  # Check that this is a shared layer
            layer_size = layer
#             latent = act_fun(linear(latent, "shared_fc{}".format(idx), layer_size, init_scale=np.sqrt(2)))
            latent = act_fun(tfp.layers.DenseFlipout(layer_size, activation = 'relu', kernel_divergence_fn=kernel_divergence_fn)(latent))
        else:
            assert isinstance(layer, dict), "Error: the net_arch list can only contain ints and dicts"
            if 'pi' in layer:
                assert isinstance(layer['pi'], list), "Error: net_arch[-1]['pi'] must contain a list of integers."
                policy_only_layers = layer['pi']

            if 'vf' in layer:
                assert isinstance(layer['vf'], list), "Error: net_arch[-1]['vf'] must contain a list of integers."
                value_only_layers = layer['vf']
            break  # From here on the network splits up in policy and value network

    # Build the non-shared part of the network
    latent_policy = latent
    latent_value = latent
    for idx, (pi_layer_size, vf_layer_size) in enumerate(zip_longest(policy_only_layers, value_only_layers)):
        if pi_layer_size is not None:
            assert isinstance(pi_layer_size, int), "Error: net_arch[-1]['pi'] must only contain integers."
#             latent_policy = act_fun(linear(latent_policy, "pi_fc{}".format(idx), pi_layer_size, init_scale=np.sqrt(2)))
            latent_policy = act_fun(tfp.layers.DenseFlipout(pi_layer_size, activation = 'relu', kernel_divergence_fn=kernel_divergence_fn)(latent))

        if vf_layer_size is not None:
            assert isinstance(vf_layer_size, int), "Error: net_arch[-1]['vf'] must only contain integers."
#             latent_value = act_fun(linear(latent_value, "vf_fc{}".format(idx), vf_layer_size, init_scale=np.sqrt(2)))
            latent_value = act_fun(tfp.layers.DenseFlipout(vf_layer_size, activation = 'relu', kernel_divergence_fn=kernel_divergence_fn)(latent))

    return latent_policy, latent_value

In [3]:
from stable_baselines.common.policies import ActorCriticPolicy, nature_cnn

class FeedForwardPolicy(ActorCriticPolicy):
    """
    Policy object that implements actor critic, using a feed forward neural network.
    :param sess: (TensorFlow session) The current TensorFlow session
    :param ob_space: (Gym Space) The observation space of the environment
    :param ac_space: (Gym Space) The action space of the environment
    :param n_env: (int) The number of environments to run
    :param n_steps: (int) The number of steps to run for each environment
    :param n_batch: (int) The number of batch to run (n_envs * n_steps)
    :param reuse: (bool) If the policy is reusable or not
    :param layers: ([int]) (deprecated, use net_arch instead) The size of the Neural network for the policy
        (if None, default to [64, 64])
    :param net_arch: (list) Specification of the actor-critic policy network architecture (see mlp_extractor
        documentation for details).
    :param act_fun: (tf.func) the activation function to use in the neural network.
    :param cnn_extractor: (function (TensorFlow Tensor, ``**kwargs``): (TensorFlow Tensor)) the CNN feature extraction
    :param feature_extraction: (str) The feature extraction type ("cnn" or "mlp")
    :param kwargs: (dict) Extra keyword arguments for the nature CNN feature extraction
    """

    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, layers=None, net_arch=None,
                 act_fun=tf.tanh, cnn_extractor=nature_cnn, feature_extraction="cnn", **kwargs):
        super(FeedForwardPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse,
                                                scale=(feature_extraction == "cnn"))

        self._kwargs_check(feature_extraction, kwargs)

        if layers is not None:
            warnings.warn("Usage of the `layers` parameter is deprecated! Use net_arch instead "
                          "(it has a different semantics though).", DeprecationWarning)
            if net_arch is not None:
                warnings.warn("The new `net_arch` parameter overrides the deprecated `layers` parameter!",
                              DeprecationWarning)

        if net_arch is None:
            if layers is None:
                layers = [64, 64]
            net_arch = [dict(vf=layers, pi=layers)]

        with tf.variable_scope("model", reuse=reuse):
            if feature_extraction == "cnn":
                pi_latent = vf_latent = cnn_extractor(self.processed_obs, **kwargs)
            elif feature_extraction == "bnn":
                pi_latent, vf_latent = bnn_extractor(tf.layers.flatten(self.processed_obs), net_arch, act_fun)
            else:
                pi_latent, vf_latent = mlp_extractor(tf.layers.flatten(self.processed_obs), net_arch, act_fun)

            self._value_fn = linear(vf_latent, 'vf', 1)

            self._proba_distribution, self._policy, self.q_value = \
                self.pdtype.proba_distribution_from_latent(pi_latent, vf_latent, init_scale=0.01)

        self._setup_init()

    def step(self, obs, state=None, mask=None, deterministic=False):
        if deterministic:
            action, value, neglogp = self.sess.run([self.deterministic_action, self.value_flat, self.neglogp],
                                                   {self.obs_ph: obs})
        else:
            action, value, neglogp = self.sess.run([self.action, self.value_flat, self.neglogp],
                                                   {self.obs_ph: obs})
        return action, value, self.initial_state, neglogp

    def proba_step(self, obs, state=None, mask=None):
        return self.sess.run(self.policy_proba, {self.obs_ph: obs})

    def value(self, obs, state=None, mask=None):
        return self.sess.run(self.value_flat, {self.obs_ph: obs})

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [4]:
import warnings
from itertools import zip_longest
from abc import ABC, abstractmethod

import numpy as np
import tensorflow as tf
from gym.spaces import Discrete

from stable_baselines.common.tf_util import batch_to_seq, seq_to_batch
from stable_baselines.common.tf_layers import conv, linear, conv_to_fc, lstm
from stable_baselines.common.distributions import make_proba_dist_type, CategoricalProbabilityDistribution, \
    MultiCategoricalProbabilityDistribution, DiagGaussianProbabilityDistribution, BernoulliProbabilityDistribution
from stable_baselines.common.input import observation_input
from stable_baselines.common.policies import nature_cnn

In [5]:
class BnnPolicy(FeedForwardPolicy):
    """
    Policy object that implements actor critic, using a Bayesian neural net (2 layers of 64)
    :param sess: (TensorFlow session) The current TensorFlow session
    :param ob_space: (Gym Space) The observation space of the environment
    :param ac_space: (Gym Space) The action space of the environment
    :param n_env: (int) The number of environments to run
    :param n_steps: (int) The number of steps to run for each environment
    :param n_batch: (int) The number of batch to run (n_envs * n_steps)
    :param reuse: (bool) If the policy is reusable or not
    :param _kwargs: (dict) Extra keyword arguments for the nature CNN feature extraction
    """

    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **_kwargs):
        super(BnnPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse,
                                        feature_extraction="bnn", **_kwargs)

# DNN Cartpole

In [6]:
import os
import gym

from stable_baselines.ppo1 import PPO1
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import logger
from stable_baselines.common.callbacks import EvalCallback

NUM_TIMESTEPS = int(1e4)
SEED = 722
EVAL_FREQ = 250000
EVAL_EPISODES = 10  # was 1000

LOGDIR = "dnn_cartpole" # moved to zoo afterwards.
logger.configure(folder=LOGDIR)

env = gym.make("CartPole-v0")
env.seed(SEED)

Logging to dnn_cartpole


[722]

In [7]:
# take mujoco hyperparams (but doubled timesteps_per_actorbatch to cover more steps.)
dnn = PPO1(MlpPolicy, env, timesteps_per_actorbatch=4096, clip_param=0.2, entcoeff=0.0, optim_epochs=10,
                 optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=2)

eval_callback = EvalCallback(env, best_model_save_path=LOGDIR, log_path=LOGDIR, eval_freq=EVAL_FREQ, n_eval_episodes=EVAL_EPISODES)

dnn.learn(total_timesteps=NUM_TIMESTEPS, callback=eval_callback)

dnn.save(os.path.join(LOGDIR, "final_model")) # probably never get to this point.

env.close()





Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Please use `layer.__call__` method instead.










Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
********** Iteration 0 ************


  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00983 |       0.00000 |      81.41195 |       0.00399 |       0.68922
     -0.01571 |       0.00000 |      68.77129 |       0.00820 |       0.68506
     -0.01768 |       0.00000 |      43.29578 |       0.00811 |       0.68514
     -0.01988 |       0.00000 |      21.73780 |       0.00893 |       0.68433
     -0.02295 |       0.00000 |      15.19730 |       0.01010 |       0.68317
     -0.02521 |       0.00000 |      13.72938 |       0.01152 |       0.68178
     -0.02703 |       0.00000 |      12.50361 |       0.01233 |       0.68098
     -0.02813 |       0.00000 |      11.25630 |       0.01356 |       0.67978
     -0.02929 |       0.00000 |      10.23180 |       0.01353 |       0.67980
     -0.02959 |       0.00000 |       9.52145 |       0.01405 |       0.67929
Evaluating losses...
     -0.03002 |       0.00000 |       9.25645 |       0.01531 |       0.67806
-----------------------------

# BNN Cartpole

In [8]:
NUM_TIMESTEPS = int(1e4)
SEED = 722
EVAL_FREQ = 250000
EVAL_EPISODES = 10  # was 1000

LOGDIR = "bnn_cartpole" # moved to zoo afterwards.
logger.configure(folder=LOGDIR)

env = gym.make("CartPole-v0")
env.seed(SEED)

Logging to bnn_cartpole


[722]

In [9]:
# take mujoco hyperparams (but doubled timesteps_per_actorbatch to cover more steps.)
bnn = PPO1(BnnPolicy, env, timesteps_per_actorbatch=4096, clip_param=0.2, entcoeff=0.0, optim_epochs=10,
                 optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=2)

eval_callback = EvalCallback(env, best_model_save_path=LOGDIR, log_path=LOGDIR, eval_freq=EVAL_FREQ, n_eval_episodes=EVAL_EPISODES)

bnn.learn(total_timesteps=NUM_TIMESTEPS, callback=eval_callback)

bnn.save(os.path.join(LOGDIR, "final_model")) # probably never get to this point.

env.close()

Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
********** Iteration 0 ************


  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00126 |       0.00000 |      88.18262 |      7.79e-05 |       0.69307
     -0.00506 |       0.00000 |      85.07277 |       0.00076 |       0.69239
     -0.01154 |       0.00000 |      81.35635 |       0.00365 |       0.68954
     -0.01616 |       0.00000 |      76.96362 |       0.00741 |       0.68586
     -0.01699 |       0.00000 |      71.97631 |       0.00903 |       0.68428
     -0.01771 |       0.00000 |      66.60826 |       0.00836 |       0.68493
     -0.01911 |       0.00000 |      61.00874 |       0.00903 |       0.68426
     -0.02030 |       0.00000 |      55.32611 |       0.00957 |       0.68373
     -0.02151 |       0.00000 |      49.64088 |       0.00998 |       0.68332
     -0.02240 |       0.00000 |      44.18072 |       0.01026 |       0.68305
Evaluating losses...
     -0.02308 |       0.00000 |      41.50755 |       0.00988 |       0.68341
-----------------------------

# Performance

In [10]:
from stable_baselines.common.evaluation import evaluate_policy

mean_reward, std_reward = evaluate_policy(dnn, dnn.get_env(), n_eval_episodes=1000)
print(f"DNN - Mean reward: {mean_reward}, Std reward: {std_reward}")

mean_reward, std_reward = evaluate_policy(bnn, bnn.get_env(), n_eval_episodes=1000)
print(f"BNN - Mean reward: {mean_reward}, Std reward: {std_reward}")

DNN - Mean reward: 187.59, Std reward: 20.69627744305724
BNN - Mean reward: 172.414, Std reward: 27.614210182440488


In [57]:
from stable_baselines.common.evaluation import evaluate_policy

mean_reward, std_reward = evaluate_policy(dnn, dnn.get_env(), n_eval_episodes=1000)
print(f"DNN - Mean reward: {mean_reward}, Std reward: {std_reward}")

mean_reward, std_reward = evaluate_policy(bnn, bnn.get_env(), n_eval_episodes=1000)
print(f"BNN - Mean reward: {mean_reward}, Std reward: {std_reward}")

DNN - Mean reward: 175.151, Std reward: 38.99818199608797
BNN - Mean reward: 164.046, Std reward: 45.66565321989822


In [62]:
from stable_baselines.common.evaluation import evaluate_policy

mean_reward, std_reward = evaluate_policy(dnn, dnn.get_env(), n_eval_episodes=1000)
print(f"DNN - Mean reward: {mean_reward}, Std reward: {std_reward}")

mean_reward, std_reward = evaluate_policy(bnn, bnn.get_env(), n_eval_episodes=1000)
print(f"BNN - Mean reward: {mean_reward}, Std reward: {std_reward}")

DNN - Mean reward: 156.935, Std reward: 48.66798511341928
BNN - Mean reward: 189.304, Std reward: 18.416991719605022


In [67]:
from stable_baselines.common.evaluation import evaluate_policy

mean_reward, std_reward = evaluate_policy(dnn, dnn.get_env(), n_eval_episodes=1000)
print(f"DNN - Mean reward: {mean_reward}, Std reward: {std_reward}")

mean_reward, std_reward = evaluate_policy(bnn, bnn.get_env(), n_eval_episodes=1000)
print(f"BNN - Mean reward: {mean_reward}, Std reward: {std_reward}")

DNN - Mean reward: 161.503, Std reward: 47.323144348193935
BNN - Mean reward: 178.989, Std reward: 32.992982268961384


# Evalutaion

There are a few possible ways we can take this:
1. Small number of iterations on CartPole, to see which model performs better with less data.
2. Run experiment on Acrobot, to hopefully see BNN > DNN.
3. Hide 1 out of every N frames from the learning agent (self-play or single-agent), to hopefully see BNN > DNN.

For all of the above, run a hundred different initialisations using different random seeds, and evaluate using at least 1K episodes (the episodes have extremely high variance even with the same random seed)

In [18]:
import pandas as pd

#To store experiment results
df = pd.DataFrame(columns = ['Seed', 'BNN reward', 'BNN std', 'DNN reward', 'DNN std']) 

df.head()

Unnamed: 0,Seed,BNN reward,BNN std,DNN reward,DNN std


In [21]:
#Evaluation method one

import os
import gym

from stable_baselines.ppo1 import PPO1
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import logger
from stable_baselines.common.callbacks import EvalCallback
from stable_baselines.common.evaluation import evaluate_policy

NUM_TIMESTEPS = int(1e4)
SEED = 722
EVAL_FREQ = 250000
EVAL_EPISODES = 10  # was 1000

for SEED in range (600, 701):
    LOGDIR = "dnn_cartpole" # moved to zoo afterwards.
    logger.configure(folder=LOGDIR)
    
    env = gym.make("CartPole-v0")
    env.seed(SEED)

    # take mujoco hyperparams (but doubled timesteps_per_actorbatch to cover more steps.)
    dnn = PPO1(MlpPolicy, env, timesteps_per_actorbatch=4096, clip_param=0.2, entcoeff=0.0, optim_epochs=10,
                 optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=2)

    eval_callback = EvalCallback(env, best_model_save_path=LOGDIR, log_path=LOGDIR, eval_freq=EVAL_FREQ, n_eval_episodes=EVAL_EPISODES)

    dnn.learn(total_timesteps=NUM_TIMESTEPS, callback=eval_callback)

    dnn.save(os.path.join(LOGDIR, "final_model")) # probably never get to this point.

    env.close()
    
    LOGDIR = "bnn_cartpole" # moved to zoo afterwards.
    logger.configure(folder=LOGDIR)

    env = gym.make("CartPole-v0")
    env.seed(SEED)
    
    # take mujoco hyperparams (but doubled timesteps_per_actorbatch to cover more steps.)
    bnn = PPO1(BnnPolicy, env, timesteps_per_actorbatch=4096, clip_param=0.2, entcoeff=0.0, optim_epochs=10,
                 optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', verbose=2)

    eval_callback = EvalCallback(env, best_model_save_path=LOGDIR, log_path=LOGDIR, eval_freq=EVAL_FREQ, n_eval_episodes=EVAL_EPISODES)

    bnn.learn(total_timesteps=NUM_TIMESTEPS, callback=eval_callback)

    bnn.save(os.path.join(LOGDIR, "final_model")) # probably never get to this point.

    env.close()
    
    mean_rewardDNN, std_rewardDNN = evaluate_policy(dnn, dnn.get_env(), n_eval_episodes=1000)
    print(f"DNN - Mean reward: {mean_rewardDNN}, Std reward: {std_rewardDNN}")

    mean_rewardBNN, std_rewardBNN = evaluate_policy(bnn, bnn.get_env(), n_eval_episodes=1000)
    print(f"BNN - Mean reward: {mean_rewardBNN}, Std reward: {std_rewardBNN}")
    
    data = {
    "Seed" : SEED,
    "BNN reward" : mean_rewardBNN,
    "BNN std" : std_rewardBNN,
    "DNN reward" : mean_rewardDNN,
    "DNN std" : std_rewardDNN
    }

    df = df.append(data, ignore_index=True)

    

Logging to dnn_cartpole
********** Iteration 0 ************


  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01104 |       0.00000 |      83.31959 |       0.00455 |       0.68846
     -0.01737 |       0.00000 |      70.39660 |       0.00789 |       0.68506
     -0.01893 |       0.00000 |      44.48908 |       0.00881 |       0.68414
     -0.02156 |       0.00000 |      22.42776 |       0.00901 |       0.68393
     -0.02429 |       0.00000 |      15.77996 |       0.01042 |       0.68252
     -0.02641 |       0.00000 |      14.44719 |       0.01168 |       0.68127
     -0.02701 |       0.00000 |      13.38486 |       0.01305 |       0.67991
     -0.02805 |       0.00000 |      12.24819 |       0.01337 |       0.67960
     -0.02847 |       0.00000 |      11.23798 |       0.01376 |       0.67921
     -0.02903 |       0.00000 |      10.48966 |       0.01386 |       0.67911
Evaluating losses...
     -0.02881 |       0.00000 |      10.17848 |       0.01603 |       0.67697
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00168 |       0.00000 |      93.04918 |       0.00010 |       0.69305
     -0.00627 |       0.00000 |      90.33788 |       0.00093 |       0.69221
     -0.01293 |       0.00000 |      87.07071 |       0.00436 |       0.68881
     -0.01600 |       0.00000 |      83.06079 |       0.00770 |       0.68553
     -0.01655 |       0.00000 |      78.25071 |       0.00884 |       0.68440
     -0.01734 |       0.00000 |      72.83126 |       0.00866 |       0.68458
     -0.01846 |       0.00000 |      67.07480 |       0.00882 |       0.68442
     -0.01998 |       0.00000 |      61.22139 |       0.00917 |       0.68406
     -0.02124 |       0.00000 |      55.44053 |       0.00942 |       0.68382
     -0.02289 |       0.00000 |      49.86128 |       0.01027 |       0.68297
Evaluating losses...
     -0.02381 |       0.00000 |      47.13252 |       0.01006 |       0.68318
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00831 |       0.00000 |      79.21246 |       0.00347 |       0.68986
     -0.01438 |       0.00000 |      66.12856 |       0.00804 |       0.68546
     -0.01619 |       0.00000 |      40.38298 |       0.00849 |       0.68502
     -0.01907 |       0.00000 |      20.21524 |       0.00905 |       0.68447
     -0.02272 |       0.00000 |      14.85998 |       0.01055 |       0.68300
     -0.02603 |       0.00000 |      13.72186 |       0.01193 |       0.68165
     -0.02789 |       0.00000 |      12.59174 |       0.01298 |       0.68062
     -0.02908 |       0.00000 |      11.32942 |       0.01344 |       0.68016
     -0.02968 |       0.00000 |      10.20894 |       0.01349 |       0.68011
     -0.02997 |       0.00000 |       9.40086 |       0.01393 |       0.67968
Evaluating losses...
     -0.03028 |       0.00000 |       9.09371 |       0.01264 |       0.68092
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00132 |       0.00000 |      87.21719 |       0.00013 |       0.69301
     -0.00562 |       0.00000 |      84.53976 |       0.00078 |       0.69235
     -0.01187 |       0.00000 |      81.48814 |       0.00370 |       0.68942
     -0.01646 |       0.00000 |      77.90315 |       0.00719 |       0.68598
     -0.01748 |       0.00000 |      73.66091 |       0.00859 |       0.68460
     -0.01823 |       0.00000 |      68.84760 |       0.00906 |       0.68413
     -0.01973 |       0.00000 |      63.66901 |       0.00900 |       0.68419
     -0.02108 |       0.00000 |      58.23838 |       0.00932 |       0.68387
     -0.02271 |       0.00000 |      52.80296 |       0.01061 |       0.68259
     -0.02435 |       0.00000 |      47.55459 |       0.01050 |       0.68270
Evaluating losses...
     -0.02501 |       0.00000 |      44.98935 |       0.01096 |       0.68224
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00671 |       0.00000 |      94.10288 |       0.00339 |       0.68969
     -0.01539 |       0.00000 |      80.57233 |       0.00742 |       0.68564
     -0.01633 |       0.00000 |      52.34605 |       0.00850 |       0.68456
     -0.01884 |       0.00000 |      26.87642 |       0.00842 |       0.68463
     -0.02208 |       0.00000 |      18.23467 |       0.00916 |       0.68389
     -0.02509 |       0.00000 |      16.41891 |       0.01111 |       0.68196
     -0.02714 |       0.00000 |      15.13486 |       0.01211 |       0.68097
     -0.02833 |       0.00000 |      13.73058 |       0.01296 |       0.68013
     -0.02925 |       0.00000 |      12.51557 |       0.01339 |       0.67972
     -0.02970 |       0.00000 |      11.65894 |       0.01338 |       0.67972
Evaluating losses...
     -0.03014 |       0.00000 |      11.33321 |       0.01316 |       0.67995
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00187 |       0.00000 |      78.97748 |       0.00016 |       0.69300
     -0.00719 |       0.00000 |      76.34558 |       0.00168 |       0.69152
     -0.01200 |       0.00000 |      73.13460 |       0.00499 |       0.68827
     -0.01538 |       0.00000 |      69.28011 |       0.00683 |       0.68646
     -0.01724 |       0.00000 |      64.83208 |       0.00834 |       0.68497
     -0.01816 |       0.00000 |      60.01022 |       0.00901 |       0.68431
     -0.01982 |       0.00000 |      54.93562 |       0.00914 |       0.68418
     -0.02103 |       0.00000 |      49.77633 |       0.00935 |       0.68397
     -0.02244 |       0.00000 |      44.69564 |       0.01034 |       0.68300
     -0.02365 |       0.00000 |      39.85924 |       0.01059 |       0.68275
Evaluating losses...
     -0.02441 |       0.00000 |      37.56202 |       0.00991 |       0.68341
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00836 |       0.00000 |      82.48325 |       0.00325 |       0.68983
     -0.01741 |       0.00000 |      69.66510 |       0.00836 |       0.68468
     -0.01912 |       0.00000 |      44.09813 |       0.00838 |       0.68467
     -0.02180 |       0.00000 |      22.33900 |       0.00954 |       0.68351
     -0.02466 |       0.00000 |      15.73257 |       0.01038 |       0.68267
     -0.02665 |       0.00000 |      14.31156 |       0.01175 |       0.68132
     -0.02806 |       0.00000 |      13.07666 |       0.01257 |       0.68051
     -0.02916 |       0.00000 |      11.81603 |       0.01315 |       0.67993
     -0.02978 |       0.00000 |      10.80821 |       0.01342 |       0.67967
     -0.03067 |       0.00000 |      10.15432 |       0.01374 |       0.67935
Evaluating losses...
     -0.03100 |       0.00000 |       9.89902 |       0.01457 |       0.67854
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00109 |       0.00000 |      93.84705 |      5.48e-05 |       0.69309
     -0.00466 |       0.00000 |      91.07639 |       0.00067 |       0.69247
     -0.01052 |       0.00000 |      87.49888 |       0.00304 |       0.69012
     -0.01484 |       0.00000 |      83.00015 |       0.00710 |       0.68612
     -0.01624 |       0.00000 |      77.72357 |       0.00820 |       0.68505
     -0.01691 |       0.00000 |      71.82268 |       0.00916 |       0.68411
     -0.01785 |       0.00000 |      65.54908 |       0.00928 |       0.68399
     -0.01912 |       0.00000 |      59.17629 |       0.00895 |       0.68430
     -0.02073 |       0.00000 |      52.93835 |       0.00953 |       0.68373
     -0.02251 |       0.00000 |      47.01973 |       0.01025 |       0.68301
Evaluating losses...
     -0.02325 |       0.00000 |      44.14791 |       0.01060 |       0.68267
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00830 |       0.00000 |      89.89194 |       0.00344 |       0.68954
     -0.01444 |       0.00000 |      76.85749 |       0.00797 |       0.68491
     -0.01585 |       0.00000 |      49.78482 |       0.00822 |       0.68466
     -0.01832 |       0.00000 |      25.55328 |       0.00842 |       0.68445
     -0.02140 |       0.00000 |      17.54012 |       0.00982 |       0.68306
     -0.02394 |       0.00000 |      15.83537 |       0.01154 |       0.68134
     -0.02544 |       0.00000 |      14.60463 |       0.01247 |       0.68042
     -0.02647 |       0.00000 |      13.25350 |       0.01312 |       0.67978
     -0.02721 |       0.00000 |      12.05355 |       0.01314 |       0.67977
     -0.02758 |       0.00000 |      11.20622 |       0.01381 |       0.67911
Evaluating losses...
     -0.02831 |       0.00000 |      10.84922 |       0.01375 |       0.67916
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00181 |       0.00000 |      83.62863 |      9.11e-05 |       0.69306
     -0.00788 |       0.00000 |      80.76478 |       0.00118 |       0.69198
     -0.01574 |       0.00000 |      77.57241 |       0.00505 |       0.68817
     -0.01905 |       0.00000 |      73.67188 |       0.00844 |       0.68485
     -0.01968 |       0.00000 |      69.07552 |       0.00966 |       0.68366
     -0.02038 |       0.00000 |      63.94521 |       0.00951 |       0.68380
     -0.02177 |       0.00000 |      58.56540 |       0.00934 |       0.68396
     -0.02322 |       0.00000 |      53.11386 |       0.00960 |       0.68369
     -0.02469 |       0.00000 |      47.79222 |       0.01014 |       0.68317
     -0.02634 |       0.00000 |      42.68106 |       0.01060 |       0.68270
Evaluating losses...
     -0.02664 |       0.00000 |      40.23510 |       0.01201 |       0.68133
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00947 |       0.00000 |      91.01717 |       0.00335 |       0.68990
     -0.01518 |       0.00000 |      78.08060 |       0.00834 |       0.68505
     -0.01679 |       0.00000 |      50.77236 |       0.00799 |       0.68539
     -0.01871 |       0.00000 |      25.76315 |       0.00836 |       0.68501
     -0.02195 |       0.00000 |      17.41224 |       0.00922 |       0.68416
     -0.02524 |       0.00000 |      15.76602 |       0.01113 |       0.68229
     -0.02751 |       0.00000 |      14.62425 |       0.01233 |       0.68110
     -0.02904 |       0.00000 |      13.36506 |       0.01297 |       0.68048
     -0.02991 |       0.00000 |      12.22252 |       0.01364 |       0.67982
     -0.03033 |       0.00000 |      11.36738 |       0.01401 |       0.67946
Evaluating losses...
     -0.03077 |       0.00000 |      11.03735 |       0.01286 |       0.68058
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00084 |       0.00000 |      88.45740 |      3.80e-05 |       0.69311
     -0.00350 |       0.00000 |      85.66366 |       0.00047 |       0.69269
     -0.00841 |       0.00000 |      82.34449 |       0.00199 |       0.69120
     -0.01374 |       0.00000 |      78.22843 |       0.00562 |       0.68761
     -0.01595 |       0.00000 |      73.41665 |       0.00788 |       0.68539
     -0.01642 |       0.00000 |      68.12473 |       0.00821 |       0.68507
     -0.01737 |       0.00000 |      62.56273 |       0.00889 |       0.68440
     -0.01863 |       0.00000 |      56.90676 |       0.00847 |       0.68480
     -0.02030 |       0.00000 |      51.30700 |       0.00920 |       0.68408
     -0.02165 |       0.00000 |      45.88834 |       0.01019 |       0.68310
Evaluating losses...
     -0.02282 |       0.00000 |      43.20287 |       0.00991 |       0.68337
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01049 |       0.00000 |      84.72376 |       0.00474 |       0.68845
     -0.01652 |       0.00000 |      72.21685 |       0.00752 |       0.68569
     -0.01829 |       0.00000 |      45.91370 |       0.00810 |       0.68512
     -0.02074 |       0.00000 |      22.95880 |       0.00871 |       0.68451
     -0.02307 |       0.00000 |      15.91994 |       0.01007 |       0.68317
     -0.02547 |       0.00000 |      14.46792 |       0.01144 |       0.68182
     -0.02702 |       0.00000 |      13.24975 |       0.01234 |       0.68093
     -0.02818 |       0.00000 |      11.92383 |       0.01304 |       0.68024
     -0.02865 |       0.00000 |      10.84656 |       0.01327 |       0.68001
     -0.02935 |       0.00000 |      10.09756 |       0.01394 |       0.67935
Evaluating losses...
     -0.02966 |       0.00000 |       9.82160 |       0.01307 |       0.68020
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00146 |       0.00000 |      94.97602 |       0.00016 |       0.69298
     -0.00641 |       0.00000 |      91.94418 |       0.00143 |       0.69170
     -0.01261 |       0.00000 |      88.45521 |       0.00508 |       0.68808
     -0.01593 |       0.00000 |      84.28053 |       0.00725 |       0.68594
     -0.01716 |       0.00000 |      79.40884 |       0.00867 |       0.68455
     -0.01809 |       0.00000 |      73.93264 |       0.00905 |       0.68418
     -0.01920 |       0.00000 |      68.00504 |       0.00899 |       0.68422
     -0.02029 |       0.00000 |      61.88317 |       0.00894 |       0.68427
     -0.02173 |       0.00000 |      55.75229 |       0.00956 |       0.68365
     -0.02298 |       0.00000 |      49.82974 |       0.01030 |       0.68293
Evaluating losses...
     -0.02372 |       0.00000 |      46.92803 |       0.01021 |       0.68301
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01119 |       0.00000 |      84.16708 |       0.00454 |       0.68853
     -0.01684 |       0.00000 |      71.53416 |       0.00859 |       0.68445
     -0.01826 |       0.00000 |      45.59304 |       0.00793 |       0.68510
     -0.02046 |       0.00000 |      23.20214 |       0.00905 |       0.68397
     -0.02324 |       0.00000 |      16.10640 |       0.01034 |       0.68269
     -0.02533 |       0.00000 |      14.66428 |       0.01183 |       0.68121
     -0.02650 |       0.00000 |      13.65542 |       0.01275 |       0.68030
     -0.02693 |       0.00000 |      12.51706 |       0.01357 |       0.67949
     -0.02791 |       0.00000 |      11.44582 |       0.01368 |       0.67939
     -0.02827 |       0.00000 |      10.62386 |       0.01396 |       0.67911
Evaluating losses...
     -0.02869 |       0.00000 |      10.28435 |       0.01323 |       0.67983
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00105 |       0.00000 |      89.86337 |      8.34e-05 |       0.69306
     -0.00442 |       0.00000 |      87.17561 |       0.00063 |       0.69250
     -0.00989 |       0.00000 |      84.04006 |       0.00317 |       0.68996
     -0.01414 |       0.00000 |      80.20712 |       0.00705 |       0.68613
     -0.01529 |       0.00000 |      75.60359 |       0.00877 |       0.68443
     -0.01645 |       0.00000 |      70.33739 |       0.00841 |       0.68478
     -0.01776 |       0.00000 |      64.72981 |       0.00922 |       0.68399
     -0.01963 |       0.00000 |      58.98057 |       0.00925 |       0.68395
     -0.02112 |       0.00000 |      53.25884 |       0.00995 |       0.68325
     -0.02275 |       0.00000 |      47.78210 |       0.01040 |       0.68280
Evaluating losses...
     -0.02328 |       0.00000 |      45.12704 |       0.01022 |       0.68299
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00874 |       0.00000 |      78.49369 |       0.00418 |       0.68915
     -0.01477 |       0.00000 |      65.76054 |       0.00798 |       0.68548
     -0.01640 |       0.00000 |      40.64634 |       0.00852 |       0.68495
     -0.01875 |       0.00000 |      20.66547 |       0.00920 |       0.68428
     -0.02115 |       0.00000 |      15.23471 |       0.01053 |       0.68298
     -0.02359 |       0.00000 |      14.01829 |       0.01142 |       0.68211
     -0.02513 |       0.00000 |      12.85858 |       0.01249 |       0.68106
     -0.02631 |       0.00000 |      11.68509 |       0.01299 |       0.68057
     -0.02688 |       0.00000 |      10.71957 |       0.01346 |       0.68011
     -0.02676 |       0.00000 |      10.05666 |       0.01396 |       0.67962
Evaluating losses...
     -0.02697 |       0.00000 |       9.78788 |       0.01571 |       0.67793
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00169 |       0.00000 |      92.45570 |       0.00012 |       0.69302
     -0.00686 |       0.00000 |      89.53152 |       0.00109 |       0.69203
     -0.01417 |       0.00000 |      86.03674 |       0.00474 |       0.68839
     -0.01663 |       0.00000 |      81.70354 |       0.00846 |       0.68472
     -0.01732 |       0.00000 |      76.46985 |       0.00837 |       0.68481
     -0.01792 |       0.00000 |      70.61603 |       0.00936 |       0.68383
     -0.01834 |       0.00000 |      64.45230 |       0.00882 |       0.68436
     -0.01909 |       0.00000 |      58.12881 |       0.00890 |       0.68428
     -0.01981 |       0.00000 |      51.88945 |       0.00896 |       0.68422
     -0.02041 |       0.00000 |      45.97902 |       0.00939 |       0.68380
Evaluating losses...
     -0.02100 |       0.00000 |      43.07516 |       0.00932 |       0.68387
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00939 |       0.00000 |      87.66176 |       0.00479 |       0.68843
     -0.01704 |       0.00000 |      75.13103 |       0.00770 |       0.68556
     -0.01878 |       0.00000 |      48.78662 |       0.00896 |       0.68432
     -0.02244 |       0.00000 |      24.76236 |       0.00918 |       0.68409
     -0.02576 |       0.00000 |      16.77304 |       0.01080 |       0.68249
     -0.02873 |       0.00000 |      15.08194 |       0.01185 |       0.68146
     -0.03060 |       0.00000 |      13.68200 |       0.01280 |       0.68053
     -0.03184 |       0.00000 |      12.16458 |       0.01333 |       0.68000
     -0.03254 |       0.00000 |      10.88465 |       0.01376 |       0.67959
     -0.03256 |       0.00000 |      10.03540 |       0.01401 |       0.67935
Evaluating losses...
     -0.03324 |       0.00000 |       9.71363 |       0.01323 |       0.68010
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00129 |       0.00000 |      94.78271 |       0.00010 |       0.69306
     -0.00576 |       0.00000 |      92.21814 |       0.00094 |       0.69226
     -0.01291 |       0.00000 |      89.11326 |       0.00419 |       0.68909
     -0.01740 |       0.00000 |      85.14893 |       0.00791 |       0.68546
     -0.01848 |       0.00000 |      80.29318 |       0.00841 |       0.68496
     -0.01940 |       0.00000 |      74.80145 |       0.00937 |       0.68404
     -0.02026 |       0.00000 |      68.84944 |       0.00920 |       0.68419
     -0.02169 |       0.00000 |      62.61634 |       0.00941 |       0.68398
     -0.02340 |       0.00000 |      56.40451 |       0.00962 |       0.68377
     -0.02546 |       0.00000 |      50.41111 |       0.01033 |       0.68307
Evaluating losses...
     -0.02615 |       0.00000 |      47.49701 |       0.01062 |       0.68279
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01037 |       0.00000 |      89.14943 |       0.00388 |       0.68914
     -0.01648 |       0.00000 |      75.54720 |       0.00856 |       0.68442
     -0.01811 |       0.00000 |      47.68260 |       0.00838 |       0.68459
     -0.02100 |       0.00000 |      24.23354 |       0.00880 |       0.68417
     -0.02422 |       0.00000 |      17.25403 |       0.01009 |       0.68288
     -0.02669 |       0.00000 |      15.78735 |       0.01172 |       0.68126
     -0.02836 |       0.00000 |      14.57989 |       0.01285 |       0.68015
     -0.02926 |       0.00000 |      13.33678 |       0.01369 |       0.67932
     -0.02976 |       0.00000 |      12.24130 |       0.01371 |       0.67930
     -0.03025 |       0.00000 |      11.45214 |       0.01421 |       0.67880
Evaluating losses...
     -0.03077 |       0.00000 |      11.13293 |       0.01366 |       0.67935
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00185 |       0.00000 |      89.27676 |       0.00011 |       0.69303
     -0.00732 |       0.00000 |      86.74909 |       0.00110 |       0.69204
     -0.01486 |       0.00000 |      83.65908 |       0.00479 |       0.68838
     -0.01828 |       0.00000 |      79.74551 |       0.00794 |       0.68528
     -0.01912 |       0.00000 |      75.02397 |       0.00908 |       0.68416
     -0.02009 |       0.00000 |      69.66121 |       0.00944 |       0.68380
     -0.02099 |       0.00000 |      63.88931 |       0.00905 |       0.68418
     -0.02261 |       0.00000 |      57.94059 |       0.00946 |       0.68377
     -0.02404 |       0.00000 |      52.03770 |       0.01025 |       0.68299
     -0.02550 |       0.00000 |      46.08271 |       0.01064 |       0.68260
Evaluating losses...
     -0.02575 |       0.00000 |      43.10250 |       0.01103 |       0.68222
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01254 |       0.00000 |      94.58330 |       0.00504 |       0.68790
     -0.01946 |       0.00000 |      81.21828 |       0.00853 |       0.68432
     -0.02144 |       0.00000 |      53.05435 |       0.00840 |       0.68445
     -0.02462 |       0.00000 |      27.59734 |       0.00924 |       0.68361
     -0.02854 |       0.00000 |      18.96503 |       0.01070 |       0.68215
     -0.03107 |       0.00000 |      17.11581 |       0.01204 |       0.68082
     -0.03218 |       0.00000 |      15.84052 |       0.01326 |       0.67961
     -0.03313 |       0.00000 |      14.42817 |       0.01367 |       0.67921
     -0.03364 |       0.00000 |      13.15708 |       0.01385 |       0.67903
     -0.03370 |       0.00000 |      12.19933 |       0.01438 |       0.67851
Evaluating losses...
     -0.03426 |       0.00000 |      11.81378 |       0.01279 |       0.68009
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00154 |       0.00000 |      88.14616 |      9.86e-05 |       0.69304
     -0.00701 |       0.00000 |      85.24597 |       0.00106 |       0.69206
     -0.01451 |       0.00000 |      81.92001 |       0.00506 |       0.68809
     -0.01755 |       0.00000 |      78.09380 |       0.00834 |       0.68485
     -0.01812 |       0.00000 |      73.71841 |       0.00889 |       0.68430
     -0.01890 |       0.00000 |      68.74558 |       0.00940 |       0.68380
     -0.02024 |       0.00000 |      63.35691 |       0.00872 |       0.68446
     -0.02121 |       0.00000 |      57.71946 |       0.00941 |       0.68378
     -0.02250 |       0.00000 |      52.02748 |       0.01003 |       0.68316
     -0.02392 |       0.00000 |      46.53712 |       0.01026 |       0.68293
Evaluating losses...
     -0.02465 |       0.00000 |      43.83368 |       0.01036 |       0.68283
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01066 |       0.00000 |      82.37625 |       0.00472 |       0.68839
     -0.01705 |       0.00000 |      68.95894 |       0.00829 |       0.68481
     -0.01867 |       0.00000 |      42.63371 |       0.00834 |       0.68475
     -0.02140 |       0.00000 |      21.49182 |       0.00909 |       0.68400
     -0.02489 |       0.00000 |      15.36208 |       0.01083 |       0.68228
     -0.02802 |       0.00000 |      13.90825 |       0.01227 |       0.68085
     -0.02975 |       0.00000 |      12.51083 |       0.01303 |       0.68009
     -0.03047 |       0.00000 |      11.10016 |       0.01360 |       0.67954
     -0.03041 |       0.00000 |      10.01041 |       0.01426 |       0.67889
     -0.03118 |       0.00000 |       9.32500 |       0.01396 |       0.67919
Evaluating losses...
     -0.03147 |       0.00000 |       9.07469 |       0.01483 |       0.67833
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00117 |       0.00000 |      87.40546 |      6.97e-05 |       0.69308
     -0.00526 |       0.00000 |      84.68251 |       0.00071 |       0.69245
     -0.01073 |       0.00000 |      81.19988 |       0.00309 |       0.69011
     -0.01451 |       0.00000 |      76.74495 |       0.00650 |       0.68672
     -0.01619 |       0.00000 |      71.45879 |       0.00864 |       0.68461
     -0.01692 |       0.00000 |      65.63575 |       0.00896 |       0.68428
     -0.01787 |       0.00000 |      59.54114 |       0.00891 |       0.68433
     -0.01944 |       0.00000 |      53.48527 |       0.00943 |       0.68380
     -0.02084 |       0.00000 |      47.66557 |       0.00976 |       0.68348
     -0.02241 |       0.00000 |      42.21445 |       0.01047 |       0.68278
Evaluating losses...
     -0.02315 |       0.00000 |      39.55385 |       0.01077 |       0.68248
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00965 |       0.00000 |      89.17068 |       0.00385 |       0.68928
     -0.01603 |       0.00000 |      75.39967 |       0.00795 |       0.68519
     -0.01742 |       0.00000 |      47.85702 |       0.00846 |       0.68469
     -0.01912 |       0.00000 |      24.63680 |       0.00887 |       0.68427
     -0.02253 |       0.00000 |      17.43161 |       0.00991 |       0.68323
     -0.02578 |       0.00000 |      15.91979 |       0.01167 |       0.68149
     -0.02795 |       0.00000 |      14.72400 |       0.01299 |       0.68018
     -0.02880 |       0.00000 |      13.37384 |       0.01340 |       0.67978
     -0.02966 |       0.00000 |      12.16254 |       0.01391 |       0.67928
     -0.03012 |       0.00000 |      11.24682 |       0.01421 |       0.67898
Evaluating losses...
     -0.03029 |       0.00000 |      10.88894 |       0.01554 |       0.67768
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00193 |       0.00000 |      89.42025 |       0.00012 |       0.69303
     -0.00810 |       0.00000 |      86.70559 |       0.00161 |       0.69155
     -0.01528 |       0.00000 |      83.56717 |       0.00566 |       0.68756
     -0.01761 |       0.00000 |      79.76917 |       0.00872 |       0.68454
     -0.01818 |       0.00000 |      75.23604 |       0.00945 |       0.68383
     -0.01894 |       0.00000 |      70.15320 |       0.00917 |       0.68410
     -0.01970 |       0.00000 |      64.73894 |       0.00943 |       0.68384
     -0.02079 |       0.00000 |      59.15982 |       0.00940 |       0.68386
     -0.02191 |       0.00000 |      53.54852 |       0.01042 |       0.68286
     -0.02330 |       0.00000 |      48.12657 |       0.01064 |       0.68265
Evaluating losses...
     -0.02404 |       0.00000 |      45.48777 |       0.01115 |       0.68215
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01012 |       0.00000 |      86.81961 |       0.00439 |       0.68892
     -0.01744 |       0.00000 |      74.08448 |       0.00804 |       0.68539
     -0.01926 |       0.00000 |      47.33062 |       0.00855 |       0.68489
     -0.02202 |       0.00000 |      24.01334 |       0.00930 |       0.68415
     -0.02563 |       0.00000 |      16.71763 |       0.01055 |       0.68292
     -0.02809 |       0.00000 |      15.23231 |       0.01229 |       0.68122
     -0.02967 |       0.00000 |      14.15143 |       0.01322 |       0.68032
     -0.03057 |       0.00000 |      12.94469 |       0.01375 |       0.67980
     -0.03100 |       0.00000 |      11.85802 |       0.01419 |       0.67936
     -0.03098 |       0.00000 |      11.04455 |       0.01496 |       0.67862
Evaluating losses...
     -0.03185 |       0.00000 |      10.74666 |       0.01413 |       0.67942
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00093 |       0.00000 |      90.73298 |      5.92e-05 |       0.69309
     -0.00422 |       0.00000 |      87.81602 |       0.00069 |       0.69245
     -0.00860 |       0.00000 |      84.48897 |       0.00310 |       0.69003
     -0.01113 |       0.00000 |      80.43800 |       0.00608 |       0.68709
     -0.01228 |       0.00000 |      75.75423 |       0.00764 |       0.68555
     -0.01321 |       0.00000 |      70.43546 |       0.00811 |       0.68509
     -0.01370 |       0.00000 |      64.73523 |       0.00831 |       0.68489
     -0.01443 |       0.00000 |      58.81767 |       0.00884 |       0.68437
     -0.01513 |       0.00000 |      52.91713 |       0.00866 |       0.68454
     -0.01597 |       0.00000 |      47.25595 |       0.00933 |       0.68388
Evaluating losses...
     -0.01656 |       0.00000 |      44.50844 |       0.00857 |       0.68462
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00944 |       0.00000 |      83.07502 |       0.00366 |       0.68969
     -0.01498 |       0.00000 |      70.63530 |       0.00808 |       0.68546
     -0.01671 |       0.00000 |      44.53046 |       0.00853 |       0.68501
     -0.01953 |       0.00000 |      22.15381 |       0.00887 |       0.68467
     -0.02352 |       0.00000 |      15.42286 |       0.01066 |       0.68292
     -0.02625 |       0.00000 |      14.03837 |       0.01233 |       0.68129
     -0.02725 |       0.00000 |      12.81180 |       0.01308 |       0.68056
     -0.02767 |       0.00000 |      11.50814 |       0.01357 |       0.68008
     -0.02871 |       0.00000 |      10.36095 |       0.01361 |       0.68003
     -0.02849 |       0.00000 |       9.56190 |       0.01411 |       0.67955
Evaluating losses...
     -0.02934 |       0.00000 |       9.24657 |       0.01422 |       0.67943
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00114 |       0.00000 |      93.96014 |      8.01e-05 |       0.69307
     -0.00490 |       0.00000 |      90.62772 |       0.00086 |       0.69230
     -0.01042 |       0.00000 |      87.04362 |       0.00381 |       0.68939
     -0.01372 |       0.00000 |      83.05396 |       0.00697 |       0.68629
     -0.01465 |       0.00000 |      78.51494 |       0.00865 |       0.68465
     -0.01530 |       0.00000 |      73.46721 |       0.00834 |       0.68495
     -0.01612 |       0.00000 |      68.05845 |       0.00899 |       0.68431
     -0.01687 |       0.00000 |      62.47168 |       0.00904 |       0.68426
     -0.01766 |       0.00000 |      56.93413 |       0.00998 |       0.68334
     -0.01873 |       0.00000 |      51.52541 |       0.00967 |       0.68364
Evaluating losses...
     -0.01905 |       0.00000 |      48.81628 |       0.01003 |       0.68328
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00923 |       0.00000 |      87.74744 |       0.00434 |       0.68885
     -0.01636 |       0.00000 |      75.08533 |       0.00774 |       0.68548
     -0.01839 |       0.00000 |      48.82258 |       0.00856 |       0.68466
     -0.02121 |       0.00000 |      24.96018 |       0.00900 |       0.68422
     -0.02367 |       0.00000 |      16.85196 |       0.01045 |       0.68279
     -0.02554 |       0.00000 |      15.10620 |       0.01156 |       0.68170
     -0.02677 |       0.00000 |      13.84401 |       0.01202 |       0.68125
     -0.02739 |       0.00000 |      12.42750 |       0.01259 |       0.68069
     -0.02816 |       0.00000 |      11.19038 |       0.01325 |       0.68004
     -0.02872 |       0.00000 |      10.30946 |       0.01322 |       0.68007
Evaluating losses...
     -0.02905 |       0.00000 |       9.97772 |       0.01419 |       0.67912
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00139 |       0.00000 |      90.71346 |      7.80e-05 |       0.69307
     -0.00595 |       0.00000 |      88.31053 |       0.00119 |       0.69196
     -0.01175 |       0.00000 |      85.38226 |       0.00468 |       0.68852
     -0.01491 |       0.00000 |      81.70265 |       0.00681 |       0.68640
     -0.01597 |       0.00000 |      77.17358 |       0.00820 |       0.68503
     -0.01677 |       0.00000 |      72.07275 |       0.00844 |       0.68480
     -0.01788 |       0.00000 |      66.68974 |       0.00875 |       0.68449
     -0.01909 |       0.00000 |      61.15518 |       0.00902 |       0.68422
     -0.02040 |       0.00000 |      55.59238 |       0.00968 |       0.68357
     -0.02142 |       0.00000 |      50.14378 |       0.00986 |       0.68338
Evaluating losses...
     -0.02272 |       0.00000 |      47.43156 |       0.01011 |       0.68313
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01199 |       0.00000 |      85.83646 |       0.00461 |       0.68857
     -0.01777 |       0.00000 |      72.33752 |       0.00847 |       0.68474
     -0.01913 |       0.00000 |      45.74072 |       0.00882 |       0.68439
     -0.02069 |       0.00000 |      23.43275 |       0.00887 |       0.68434
     -0.02376 |       0.00000 |      16.51740 |       0.00935 |       0.68385
     -0.02668 |       0.00000 |      15.00756 |       0.01117 |       0.68206
     -0.02847 |       0.00000 |      13.81159 |       0.01252 |       0.68073
     -0.02926 |       0.00000 |      12.54016 |       0.01337 |       0.67990
     -0.03023 |       0.00000 |      11.45200 |       0.01363 |       0.67964
     -0.03068 |       0.00000 |      10.70434 |       0.01393 |       0.67934
Evaluating losses...
     -0.03108 |       0.00000 |      10.39827 |       0.01415 |       0.67912
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00171 |       0.00000 |      96.08575 |       0.00013 |       0.69302
     -0.00733 |       0.00000 |      93.44949 |       0.00150 |       0.69165
     -0.01413 |       0.00000 |      90.32037 |       0.00548 |       0.68773
     -0.01696 |       0.00000 |      86.40935 |       0.00783 |       0.68542
     -0.01770 |       0.00000 |      81.74799 |       0.00855 |       0.68471
     -0.01847 |       0.00000 |      76.52289 |       0.00915 |       0.68413
     -0.01918 |       0.00000 |      70.88748 |       0.00920 |       0.68407
     -0.02019 |       0.00000 |      64.95251 |       0.00911 |       0.68416
     -0.02159 |       0.00000 |      58.95415 |       0.00952 |       0.68375
     -0.02244 |       0.00000 |      53.04932 |       0.01016 |       0.68312
Evaluating losses...
     -0.02320 |       0.00000 |      50.10308 |       0.01138 |       0.68193
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00846 |       0.00000 |      87.44940 |       0.00372 |       0.68950
     -0.01433 |       0.00000 |      74.79963 |       0.00780 |       0.68550
     -0.01638 |       0.00000 |      48.37770 |       0.00789 |       0.68541
     -0.01875 |       0.00000 |      24.54701 |       0.00904 |       0.68428
     -0.02120 |       0.00000 |      16.74004 |       0.01019 |       0.68315
     -0.02294 |       0.00000 |      15.08701 |       0.01119 |       0.68216
     -0.02441 |       0.00000 |      13.86174 |       0.01236 |       0.68102
     -0.02570 |       0.00000 |      12.62034 |       0.01309 |       0.68031
     -0.02664 |       0.00000 |      11.63256 |       0.01352 |       0.67988
     -0.02700 |       0.00000 |      10.96659 |       0.01344 |       0.67996
Evaluating losses...
     -0.02798 |       0.00000 |      10.71350 |       0.01454 |       0.67888
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00144 |       0.00000 |      84.88058 |       0.00013 |       0.69302
     -0.00618 |       0.00000 |      82.08293 |       0.00093 |       0.69223
     -0.01336 |       0.00000 |      78.72737 |       0.00440 |       0.68880
     -0.01752 |       0.00000 |      74.63428 |       0.00759 |       0.68566
     -0.01831 |       0.00000 |      69.87351 |       0.00945 |       0.68386
     -0.01909 |       0.00000 |      64.58508 |       0.00912 |       0.68416
     -0.01997 |       0.00000 |      58.98639 |       0.00923 |       0.68405
     -0.02134 |       0.00000 |      53.34890 |       0.00953 |       0.68375
     -0.02231 |       0.00000 |      47.86625 |       0.00981 |       0.68347
     -0.02353 |       0.00000 |      42.70375 |       0.01028 |       0.68300
Evaluating losses...
     -0.02427 |       0.00000 |      40.24433 |       0.01105 |       0.68225
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00845 |       0.00000 |      89.44132 |       0.00379 |       0.68931
     -0.01410 |       0.00000 |      76.27887 |       0.00756 |       0.68553
     -0.01573 |       0.00000 |      49.09818 |       0.00806 |       0.68503
     -0.01791 |       0.00000 |      25.08253 |       0.00857 |       0.68452
     -0.02089 |       0.00000 |      17.31253 |       0.00952 |       0.68357
     -0.02326 |       0.00000 |      15.65569 |       0.01110 |       0.68201
     -0.02515 |       0.00000 |      14.47383 |       0.01222 |       0.68090
     -0.02634 |       0.00000 |      13.20968 |       0.01275 |       0.68038
     -0.02720 |       0.00000 |      12.09214 |       0.01321 |       0.67993
     -0.02761 |       0.00000 |      11.28876 |       0.01365 |       0.67950
Evaluating losses...
     -0.02818 |       0.00000 |      10.97565 |       0.01476 |       0.67841
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00283 |       0.00000 |      90.63201 |       0.00018 |       0.69297
     -0.01101 |       0.00000 |      88.26908 |       0.00211 |       0.69105
     -0.01840 |       0.00000 |      85.46320 |       0.00663 |       0.68659
     -0.02038 |       0.00000 |      81.95404 |       0.00868 |       0.68459
     -0.02124 |       0.00000 |      77.72620 |       0.00941 |       0.68387
     -0.02199 |       0.00000 |      72.75191 |       0.00942 |       0.68385
     -0.02330 |       0.00000 |      67.20920 |       0.00979 |       0.68348
     -0.02471 |       0.00000 |      61.32216 |       0.00937 |       0.68388
     -0.02672 |       0.00000 |      55.36663 |       0.01036 |       0.68290
     -0.02838 |       0.00000 |      49.51649 |       0.01088 |       0.68239
Evaluating losses...
     -0.02941 |       0.00000 |      46.61491 |       0.01093 |       0.68234
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01204 |       0.00000 |      86.68154 |       0.00445 |       0.68884
     -0.01930 |       0.00000 |      74.16803 |       0.00848 |       0.68491
     -0.02156 |       0.00000 |      47.50610 |       0.00891 |       0.68448
     -0.02450 |       0.00000 |      23.94238 |       0.00931 |       0.68408
     -0.02716 |       0.00000 |      16.53604 |       0.01086 |       0.68255
     -0.02906 |       0.00000 |      14.90595 |       0.01188 |       0.68156
     -0.02992 |       0.00000 |      13.57711 |       0.01225 |       0.68119
     -0.03075 |       0.00000 |      12.25790 |       0.01329 |       0.68018
     -0.03115 |       0.00000 |      11.23499 |       0.01359 |       0.67988
     -0.03180 |       0.00000 |      10.56472 |       0.01392 |       0.67956
Evaluating losses...
     -0.03192 |       0.00000 |      10.31068 |       0.01231 |       0.68112
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00177 |       0.00000 |      88.97130 |       0.00014 |       0.69302
     -0.00720 |       0.00000 |      85.72075 |       0.00144 |       0.69177
     -0.01272 |       0.00000 |      81.89474 |       0.00492 |       0.68838
     -0.01536 |       0.00000 |      77.36967 |       0.00725 |       0.68610
     -0.01649 |       0.00000 |      72.03234 |       0.00834 |       0.68503
     -0.01732 |       0.00000 |      66.16801 |       0.00874 |       0.68464
     -0.01873 |       0.00000 |      60.06392 |       0.00874 |       0.68464
     -0.01956 |       0.00000 |      53.94833 |       0.00948 |       0.68391
     -0.02054 |       0.00000 |      48.00404 |       0.00960 |       0.68379
     -0.02163 |       0.00000 |      42.43956 |       0.00991 |       0.68348
Evaluating losses...
     -0.02199 |       0.00000 |      39.76692 |       0.01031 |       0.68309
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01153 |       0.00000 |      85.34739 |       0.00442 |       0.68871
     -0.01716 |       0.00000 |      72.56855 |       0.00852 |       0.68462
     -0.01928 |       0.00000 |      46.24228 |       0.00899 |       0.68415
     -0.02220 |       0.00000 |      23.34633 |       0.00923 |       0.68390
     -0.02542 |       0.00000 |      16.08258 |       0.01068 |       0.68246
     -0.02778 |       0.00000 |      14.52716 |       0.01166 |       0.68149
     -0.02940 |       0.00000 |      13.37256 |       0.01302 |       0.68015
     -0.03053 |       0.00000 |      12.16312 |       0.01322 |       0.67995
     -0.03091 |       0.00000 |      11.12592 |       0.01393 |       0.67926
     -0.03153 |       0.00000 |      10.38480 |       0.01388 |       0.67931
Evaluating losses...
     -0.03193 |       0.00000 |      10.08811 |       0.01419 |       0.67900
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00157 |       0.00000 |      94.52390 |      8.19e-05 |       0.69306
     -0.00657 |       0.00000 |      91.54572 |       0.00090 |       0.69224
     -0.01432 |       0.00000 |      87.83791 |       0.00384 |       0.68933
     -0.01848 |       0.00000 |      83.26447 |       0.00803 |       0.68520
     -0.01917 |       0.00000 |      77.81002 |       0.00914 |       0.68411
     -0.01999 |       0.00000 |      71.70299 |       0.00919 |       0.68406
     -0.02043 |       0.00000 |      65.35316 |       0.00957 |       0.68369
     -0.02176 |       0.00000 |      58.94538 |       0.00912 |       0.68412
     -0.02270 |       0.00000 |      52.74371 |       0.00971 |       0.68353
     -0.02448 |       0.00000 |      46.86662 |       0.01007 |       0.68318
Evaluating losses...
     -0.02513 |       0.00000 |      44.00035 |       0.00942 |       0.68381
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01044 |       0.00000 |      82.20329 |       0.00467 |       0.68848
     -0.01704 |       0.00000 |      69.79575 |       0.00796 |       0.68520
     -0.01879 |       0.00000 |      44.60650 |       0.00838 |       0.68479
     -0.02143 |       0.00000 |      22.42896 |       0.00904 |       0.68413
     -0.02449 |       0.00000 |      15.37413 |       0.01029 |       0.68288
     -0.02654 |       0.00000 |      13.88037 |       0.01157 |       0.68162
     -0.02792 |       0.00000 |      12.76079 |       0.01289 |       0.68032
     -0.02883 |       0.00000 |      11.59454 |       0.01304 |       0.68018
     -0.02927 |       0.00000 |      10.63551 |       0.01372 |       0.67951
     -0.02941 |       0.00000 |       9.95879 |       0.01416 |       0.67908
Evaluating losses...
     -0.02940 |       0.00000 |       9.71030 |       0.01193 |       0.68125
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00134 |       0.00000 |      88.57794 |       0.00011 |       0.69303
     -0.00550 |       0.00000 |      85.55035 |       0.00115 |       0.69200
     -0.01183 |       0.00000 |      82.14881 |       0.00427 |       0.68890
     -0.01491 |       0.00000 |      78.09451 |       0.00755 |       0.68568
     -0.01561 |       0.00000 |      73.16946 |       0.00855 |       0.68471
     -0.01617 |       0.00000 |      67.64874 |       0.00859 |       0.68466
     -0.01683 |       0.00000 |      61.76417 |       0.00884 |       0.68441
     -0.01816 |       0.00000 |      55.84403 |       0.00903 |       0.68422
     -0.01910 |       0.00000 |      50.06881 |       0.00933 |       0.68392
     -0.01989 |       0.00000 |      44.61015 |       0.01033 |       0.68294
Evaluating losses...
     -0.02038 |       0.00000 |      41.98943 |       0.01069 |       0.68258
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00947 |       0.00000 |      86.16107 |       0.00391 |       0.68937
     -0.01574 |       0.00000 |      74.05270 |       0.00775 |       0.68563
     -0.01784 |       0.00000 |      48.07320 |       0.00812 |       0.68527
     -0.02086 |       0.00000 |      24.47494 |       0.00960 |       0.68381
     -0.02448 |       0.00000 |      16.92872 |       0.01095 |       0.68248
     -0.02664 |       0.00000 |      15.28244 |       0.01242 |       0.68104
     -0.02775 |       0.00000 |      13.91236 |       0.01308 |       0.68039
     -0.02831 |       0.00000 |      12.53746 |       0.01366 |       0.67982
     -0.02832 |       0.00000 |      11.42115 |       0.01358 |       0.67990
     -0.02878 |       0.00000 |      10.68404 |       0.01392 |       0.67956
Evaluating losses...
     -0.02897 |       0.00000 |      10.42091 |       0.01606 |       0.67749
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00164 |       0.00000 |      81.45968 |      9.26e-05 |       0.69305
     -0.00743 |       0.00000 |      78.85304 |       0.00107 |       0.69209
     -0.01575 |       0.00000 |      75.90122 |       0.00483 |       0.68836
     -0.01858 |       0.00000 |      72.34354 |       0.00847 |       0.68479
     -0.01959 |       0.00000 |      68.16105 |       0.00872 |       0.68454
     -0.02028 |       0.00000 |      63.57330 |       0.00930 |       0.68397
     -0.02121 |       0.00000 |      58.64985 |       0.00908 |       0.68417
     -0.02239 |       0.00000 |      53.51159 |       0.00993 |       0.68333
     -0.02412 |       0.00000 |      48.35698 |       0.00995 |       0.68330
     -0.02553 |       0.00000 |      43.32841 |       0.01030 |       0.68295
Evaluating losses...
     -0.02618 |       0.00000 |      40.93987 |       0.01140 |       0.68188
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00838 |       0.00000 |      83.45905 |       0.00398 |       0.68906
     -0.01430 |       0.00000 |      70.77675 |       0.00799 |       0.68500
     -0.01639 |       0.00000 |      44.82957 |       0.00809 |       0.68489
     -0.01943 |       0.00000 |      22.48532 |       0.00918 |       0.68380
     -0.02222 |       0.00000 |      15.62220 |       0.01077 |       0.68222
     -0.02434 |       0.00000 |      14.26787 |       0.01188 |       0.68112
     -0.02520 |       0.00000 |      13.21035 |       0.01272 |       0.68029
     -0.02619 |       0.00000 |      11.99948 |       0.01313 |       0.67988
     -0.02672 |       0.00000 |      10.89864 |       0.01338 |       0.67964
     -0.02661 |       0.00000 |      10.09465 |       0.01380 |       0.67924
Evaluating losses...
     -0.02731 |       0.00000 |       9.77902 |       0.01221 |       0.68080
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00103 |       0.00000 |      83.48862 |      5.59e-05 |       0.69309
     -0.00419 |       0.00000 |      80.41310 |       0.00055 |       0.69260
     -0.00937 |       0.00000 |      76.73897 |       0.00272 |       0.69046
     -0.01336 |       0.00000 |      72.27627 |       0.00700 |       0.68625
     -0.01461 |       0.00000 |      67.03738 |       0.00859 |       0.68470
     -0.01559 |       0.00000 |      61.36800 |       0.00836 |       0.68492
     -0.01621 |       0.00000 |      55.48693 |       0.00911 |       0.68418
     -0.01753 |       0.00000 |      49.63078 |       0.00913 |       0.68415
     -0.01875 |       0.00000 |      43.95876 |       0.00965 |       0.68363
     -0.01952 |       0.00000 |      38.65615 |       0.01023 |       0.68307
Evaluating losses...
     -0.02017 |       0.00000 |      36.15307 |       0.01076 |       0.68255
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01052 |       0.00000 |      94.60532 |       0.00404 |       0.68927
     -0.01823 |       0.00000 |      81.12537 |       0.00842 |       0.68502
     -0.02022 |       0.00000 |      52.48405 |       0.00827 |       0.68515
     -0.02300 |       0.00000 |      26.57899 |       0.00914 |       0.68430
     -0.02635 |       0.00000 |      17.78651 |       0.01026 |       0.68319
     -0.02884 |       0.00000 |      15.94437 |       0.01181 |       0.68168
     -0.02966 |       0.00000 |      14.65883 |       0.01278 |       0.68073
     -0.03101 |       0.00000 |      13.25879 |       0.01337 |       0.68015
     -0.03128 |       0.00000 |      12.04339 |       0.01383 |       0.67970
     -0.03195 |       0.00000 |      11.20896 |       0.01362 |       0.67991
Evaluating losses...
     -0.03210 |       0.00000 |      10.88148 |       0.01498 |       0.67858
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00072 |       0.00000 |      79.65036 |      8.12e-05 |       0.69307
     -0.00392 |       0.00000 |      76.60268 |       0.00056 |       0.69261
     -0.00940 |       0.00000 |      72.76885 |       0.00284 |       0.69037
     -0.01404 |       0.00000 |      68.18700 |       0.00658 |       0.68669
     -0.01631 |       0.00000 |      62.96183 |       0.00847 |       0.68483
     -0.01698 |       0.00000 |      57.26337 |       0.00908 |       0.68423
     -0.01795 |       0.00000 |      51.43753 |       0.00914 |       0.68416
     -0.01851 |       0.00000 |      45.74440 |       0.00917 |       0.68413
     -0.01995 |       0.00000 |      40.32658 |       0.00984 |       0.68347
     -0.02096 |       0.00000 |      35.35489 |       0.00993 |       0.68338
Evaluating losses...
     -0.02161 |       0.00000 |      32.98711 |       0.00984 |       0.68346
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00772 |       0.00000 |      93.05097 |       0.00277 |       0.69032
     -0.01533 |       0.00000 |      79.69557 |       0.00814 |       0.68492
     -0.01720 |       0.00000 |      51.82536 |       0.00797 |       0.68507
     -0.01925 |       0.00000 |      26.32752 |       0.00869 |       0.68435
     -0.02219 |       0.00000 |      17.70644 |       0.00917 |       0.68386
     -0.02477 |       0.00000 |      16.04501 |       0.01081 |       0.68224
     -0.02729 |       0.00000 |      15.05207 |       0.01223 |       0.68083
     -0.02875 |       0.00000 |      13.92579 |       0.01279 |       0.68028
     -0.02917 |       0.00000 |      12.83492 |       0.01334 |       0.67974
     -0.03016 |       0.00000 |      11.93043 |       0.01412 |       0.67897
Evaluating losses...
     -0.03060 |       0.00000 |      11.55408 |       0.01289 |       0.68019
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00213 |       0.00000 |      87.26585 |       0.00013 |       0.69301
     -0.00889 |       0.00000 |      84.27690 |       0.00169 |       0.69145
     -0.01634 |       0.00000 |      80.82898 |       0.00515 |       0.68802
     -0.01965 |       0.00000 |      76.67915 |       0.00852 |       0.68469
     -0.02053 |       0.00000 |      71.90184 |       0.00903 |       0.68418
     -0.02089 |       0.00000 |      66.64265 |       0.00923 |       0.68398
     -0.02149 |       0.00000 |      61.15163 |       0.00973 |       0.68349
     -0.02284 |       0.00000 |      55.54344 |       0.00994 |       0.68328
     -0.02385 |       0.00000 |      49.88602 |       0.00950 |       0.68371
     -0.02501 |       0.00000 |      44.50037 |       0.00987 |       0.68333
Evaluating losses...
     -0.02543 |       0.00000 |      41.85564 |       0.01086 |       0.68236
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00902 |       0.00000 |      85.64711 |       0.00333 |       0.68989
     -0.01531 |       0.00000 |      72.43018 |       0.00829 |       0.68502
     -0.01709 |       0.00000 |      45.43483 |       0.00807 |       0.68522
     -0.01930 |       0.00000 |      22.98137 |       0.00907 |       0.68424
     -0.02218 |       0.00000 |      16.21676 |       0.01021 |       0.68311
     -0.02436 |       0.00000 |      14.75108 |       0.01113 |       0.68221
     -0.02610 |       0.00000 |      13.54681 |       0.01223 |       0.68112
     -0.02728 |       0.00000 |      12.25825 |       0.01303 |       0.68034
     -0.02784 |       0.00000 |      11.14573 |       0.01334 |       0.68003
     -0.02849 |       0.00000 |      10.37018 |       0.01340 |       0.67997
Evaluating losses...
     -0.02890 |       0.00000 |      10.07113 |       0.01385 |       0.67953
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00104 |       0.00000 |      93.85953 |      8.55e-05 |       0.69307
     -0.00429 |       0.00000 |      91.02333 |       0.00082 |       0.69236
     -0.00936 |       0.00000 |      87.73798 |       0.00321 |       0.69002
     -0.01335 |       0.00000 |      83.75428 |       0.00605 |       0.68722
     -0.01540 |       0.00000 |      78.92709 |       0.00745 |       0.68583
     -0.01647 |       0.00000 |      73.37106 |       0.00890 |       0.68440
     -0.01788 |       0.00000 |      67.38108 |       0.00857 |       0.68472
     -0.01925 |       0.00000 |      61.20576 |       0.00882 |       0.68447
     -0.02093 |       0.00000 |      55.11339 |       0.00968 |       0.68363
     -0.02263 |       0.00000 |      49.18424 |       0.01018 |       0.68313
Evaluating losses...
     -0.02321 |       0.00000 |      46.29472 |       0.01099 |       0.68234
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01232 |       0.00000 |      88.16605 |       0.00461 |       0.68847
     -0.01968 |       0.00000 |      75.43063 |       0.00876 |       0.68431
     -0.02159 |       0.00000 |      48.77044 |       0.00879 |       0.68427
     -0.02439 |       0.00000 |      24.66855 |       0.00973 |       0.68333
     -0.02768 |       0.00000 |      16.76392 |       0.01111 |       0.68195
     -0.03008 |       0.00000 |      14.94151 |       0.01236 |       0.68071
     -0.03139 |       0.00000 |      13.47724 |       0.01350 |       0.67959
     -0.03242 |       0.00000 |      12.06229 |       0.01381 |       0.67929
     -0.03312 |       0.00000 |      10.93304 |       0.01416 |       0.67894
     -0.03355 |       0.00000 |      10.18334 |       0.01451 |       0.67860
Evaluating losses...
     -0.03395 |       0.00000 |       9.89781 |       0.01532 |       0.67780
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00125 |       0.00000 |      86.33978 |       0.00011 |       0.69303
     -0.00524 |       0.00000 |      83.55128 |       0.00115 |       0.69196
     -0.00934 |       0.00000 |      80.32970 |       0.00374 |       0.68938
     -0.01134 |       0.00000 |      76.49206 |       0.00544 |       0.68772
     -0.01315 |       0.00000 |      71.92950 |       0.00737 |       0.68585
     -0.01378 |       0.00000 |      66.72375 |       0.00768 |       0.68554
     -0.01456 |       0.00000 |      61.10363 |       0.00815 |       0.68507
     -0.01536 |       0.00000 |      55.25158 |       0.00783 |       0.68539
     -0.01633 |       0.00000 |      49.57913 |       0.00895 |       0.68428
     -0.01715 |       0.00000 |      44.16364 |       0.00894 |       0.68428
Evaluating losses...
     -0.01780 |       0.00000 |      41.54359 |       0.00873 |       0.68449
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00750 |       0.00000 |      83.15320 |       0.00305 |       0.68998
     -0.01392 |       0.00000 |      69.89423 |       0.00814 |       0.68482
     -0.01616 |       0.00000 |      43.38651 |       0.00825 |       0.68469
     -0.01976 |       0.00000 |      21.83859 |       0.00935 |       0.68358
     -0.02297 |       0.00000 |      15.62313 |       0.01110 |       0.68185
     -0.02574 |       0.00000 |      14.16174 |       0.01240 |       0.68055
     -0.02728 |       0.00000 |      12.91957 |       0.01290 |       0.68006
     -0.02796 |       0.00000 |      11.61764 |       0.01350 |       0.67948
     -0.02857 |       0.00000 |      10.46585 |       0.01338 |       0.67960
     -0.02867 |       0.00000 |       9.63701 |       0.01402 |       0.67897
Evaluating losses...
     -0.02880 |       0.00000 |       9.31152 |       0.01564 |       0.67737
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00140 |       0.00000 |      84.32919 |       0.00013 |       0.69302
     -0.00544 |       0.00000 |      81.70689 |       0.00102 |       0.69215
     -0.01130 |       0.00000 |      78.45866 |       0.00439 |       0.68882
     -0.01491 |       0.00000 |      74.36389 |       0.00728 |       0.68598
     -0.01554 |       0.00000 |      69.48770 |       0.00824 |       0.68505
     -0.01663 |       0.00000 |      64.10631 |       0.00903 |       0.68427
     -0.01798 |       0.00000 |      58.49514 |       0.00837 |       0.68491
     -0.01968 |       0.00000 |      52.89897 |       0.00941 |       0.68388
     -0.02129 |       0.00000 |      47.38292 |       0.01024 |       0.68307
     -0.02266 |       0.00000 |      42.10096 |       0.01078 |       0.68254
Evaluating losses...
     -0.02289 |       0.00000 |      39.55902 |       0.01239 |       0.68097
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00848 |       0.00000 |      87.37868 |       0.00387 |       0.68933
     -0.01457 |       0.00000 |      74.53916 |       0.00814 |       0.68512
     -0.01634 |       0.00000 |      48.03864 |       0.00808 |       0.68517
     -0.01837 |       0.00000 |      24.44617 |       0.00906 |       0.68420
     -0.02044 |       0.00000 |      17.04923 |       0.00975 |       0.68352
     -0.02234 |       0.00000 |      15.63136 |       0.01061 |       0.68266
     -0.02362 |       0.00000 |      14.57283 |       0.01203 |       0.68127
     -0.02515 |       0.00000 |      13.40151 |       0.01266 |       0.68066
     -0.02545 |       0.00000 |      12.35264 |       0.01277 |       0.68054
     -0.02607 |       0.00000 |      11.55378 |       0.01326 |       0.68007
Evaluating losses...
     -0.02681 |       0.00000 |      11.21743 |       0.01447 |       0.67888
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00191 |       0.00000 |      85.57808 |       0.00012 |       0.69302
     -0.00779 |       0.00000 |      82.83141 |       0.00130 |       0.69183
     -0.01424 |       0.00000 |      79.84222 |       0.00492 |       0.68823
     -0.01779 |       0.00000 |      76.42071 |       0.00784 |       0.68534
     -0.01901 |       0.00000 |      72.52492 |       0.00870 |       0.68450
     -0.01982 |       0.00000 |      68.14018 |       0.00907 |       0.68412
     -0.02117 |       0.00000 |      63.38713 |       0.00913 |       0.68405
     -0.02237 |       0.00000 |      58.46276 |       0.00962 |       0.68357
     -0.02379 |       0.00000 |      53.48033 |       0.01000 |       0.68320
     -0.02514 |       0.00000 |      48.47550 |       0.01054 |       0.68266
Evaluating losses...
     -0.02595 |       0.00000 |      45.98987 |       0.01097 |       0.68223
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01461 |       0.00000 |      92.89326 |       0.00540 |       0.68780
     -0.02099 |       0.00000 |      79.70062 |       0.00828 |       0.68494
     -0.02289 |       0.00000 |      51.76788 |       0.00914 |       0.68410
     -0.02553 |       0.00000 |      26.41743 |       0.00921 |       0.68402
     -0.02894 |       0.00000 |      17.98724 |       0.01028 |       0.68296
     -0.03071 |       0.00000 |      16.30835 |       0.01214 |       0.68113
     -0.03161 |       0.00000 |      15.29452 |       0.01270 |       0.68058
     -0.03226 |       0.00000 |      14.16270 |       0.01314 |       0.68015
     -0.03267 |       0.00000 |      13.08899 |       0.01342 |       0.67987
     -0.03297 |       0.00000 |      12.25114 |       0.01371 |       0.67958
Evaluating losses...
     -0.03334 |       0.00000 |      11.90258 |       0.01264 |       0.68063
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00148 |       0.00000 |      99.98683 |      5.27e-05 |       0.69309
     -0.00717 |       0.00000 |      97.46516 |       0.00106 |       0.69208
     -0.01551 |       0.00000 |      94.32737 |       0.00491 |       0.68826
     -0.01889 |       0.00000 |      90.27781 |       0.00809 |       0.68511
     -0.01977 |       0.00000 |      85.11182 |       0.00897 |       0.68425
     -0.02114 |       0.00000 |      79.02767 |       0.00895 |       0.68426
     -0.02241 |       0.00000 |      72.33837 |       0.00936 |       0.68386
     -0.02394 |       0.00000 |      65.39651 |       0.00918 |       0.68402
     -0.02582 |       0.00000 |      58.49413 |       0.01013 |       0.68308
     -0.02774 |       0.00000 |      51.94552 |       0.01045 |       0.68276
Evaluating losses...
     -0.02864 |       0.00000 |      48.75986 |       0.01080 |       0.68242
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00923 |       0.00000 |      87.95825 |       0.00415 |       0.68903
     -0.01638 |       0.00000 |      74.31675 |       0.00787 |       0.68534
     -0.01892 |       0.00000 |      47.03374 |       0.00867 |       0.68455
     -0.02143 |       0.00000 |      23.94627 |       0.00965 |       0.68357
     -0.02363 |       0.00000 |      16.81669 |       0.01085 |       0.68239
     -0.02571 |       0.00000 |      15.29297 |       0.01192 |       0.68134
     -0.02707 |       0.00000 |      14.06920 |       0.01259 |       0.68068
     -0.02752 |       0.00000 |      12.75470 |       0.01274 |       0.68053
     -0.02834 |       0.00000 |      11.67301 |       0.01307 |       0.68021
     -0.02861 |       0.00000 |      10.93058 |       0.01352 |       0.67976
Evaluating losses...
     -0.02870 |       0.00000 |      10.64412 |       0.01526 |       0.67807
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00124 |       0.00000 |      89.37584 |      8.56e-05 |       0.69307
     -0.00499 |       0.00000 |      86.67918 |       0.00073 |       0.69244
     -0.01077 |       0.00000 |      83.31294 |       0.00362 |       0.68958
     -0.01464 |       0.00000 |      79.08987 |       0.00708 |       0.68619
     -0.01565 |       0.00000 |      74.12080 |       0.00860 |       0.68469
     -0.01625 |       0.00000 |      68.65055 |       0.00856 |       0.68473
     -0.01687 |       0.00000 |      62.93660 |       0.00901 |       0.68430
     -0.01783 |       0.00000 |      57.16650 |       0.00929 |       0.68401
     -0.01931 |       0.00000 |      51.47969 |       0.00909 |       0.68419
     -0.02047 |       0.00000 |      46.04607 |       0.01006 |       0.68324
Evaluating losses...
     -0.02152 |       0.00000 |      43.37421 |       0.01004 |       0.68324
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00727 |       0.00000 |      83.09847 |       0.00256 |       0.69066
     -0.01458 |       0.00000 |      70.47128 |       0.00777 |       0.68559
     -0.01574 |       0.00000 |      44.33726 |       0.00810 |       0.68526
     -0.01776 |       0.00000 |      22.12979 |       0.00871 |       0.68466
     -0.02058 |       0.00000 |      15.51291 |       0.00981 |       0.68357
     -0.02390 |       0.00000 |      14.04795 |       0.01135 |       0.68206
     -0.02618 |       0.00000 |      12.81656 |       0.01280 |       0.68064
     -0.02745 |       0.00000 |      11.50694 |       0.01343 |       0.68002
     -0.02777 |       0.00000 |      10.39296 |       0.01360 |       0.67985
     -0.02848 |       0.00000 |       9.62483 |       0.01383 |       0.67962
Evaluating losses...
     -0.02861 |       0.00000 |       9.34523 |       0.01566 |       0.67785
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00111 |       0.00000 |      91.54880 |      5.93e-05 |       0.69309
     -0.00493 |       0.00000 |      88.33777 |       0.00075 |       0.69239
     -0.01147 |       0.00000 |      84.73799 |       0.00316 |       0.68999
     -0.01613 |       0.00000 |      80.51398 |       0.00711 |       0.68609
     -0.01716 |       0.00000 |      75.58350 |       0.00902 |       0.68421
     -0.01805 |       0.00000 |      70.17767 |       0.00862 |       0.68459
     -0.01958 |       0.00000 |      64.48455 |       0.00902 |       0.68419
     -0.02122 |       0.00000 |      58.68222 |       0.00934 |       0.68387
     -0.02318 |       0.00000 |      52.96735 |       0.00991 |       0.68330
     -0.02485 |       0.00000 |      47.47322 |       0.01061 |       0.68262
Evaluating losses...
     -0.02520 |       0.00000 |      44.81409 |       0.01112 |       0.68212
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01128 |       0.00000 |      92.05076 |       0.00392 |       0.68925
     -0.01820 |       0.00000 |      79.13531 |       0.00902 |       0.68421
     -0.02030 |       0.00000 |      51.51532 |       0.00830 |       0.68490
     -0.02205 |       0.00000 |      26.18688 |       0.00951 |       0.68371
     -0.02558 |       0.00000 |      17.77572 |       0.00966 |       0.68354
     -0.02852 |       0.00000 |      16.06272 |       0.01140 |       0.68183
     -0.03043 |       0.00000 |      14.84064 |       0.01252 |       0.68073
     -0.03162 |       0.00000 |      13.52015 |       0.01333 |       0.67993
     -0.03255 |       0.00000 |      12.33052 |       0.01351 |       0.67975
     -0.03284 |       0.00000 |      11.44926 |       0.01380 |       0.67947
Evaluating losses...
     -0.03334 |       0.00000 |      11.10522 |       0.01447 |       0.67881
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00167 |       0.00000 |      82.67966 |      9.93e-05 |       0.69306
     -0.00695 |       0.00000 |      80.18455 |       0.00108 |       0.69210
     -0.01437 |       0.00000 |      77.30083 |       0.00476 |       0.68848
     -0.01735 |       0.00000 |      73.72802 |       0.00796 |       0.68535
     -0.01809 |       0.00000 |      69.34630 |       0.00898 |       0.68435
     -0.01884 |       0.00000 |      64.34885 |       0.00893 |       0.68439
     -0.01994 |       0.00000 |      59.04557 |       0.00894 |       0.68437
     -0.02122 |       0.00000 |      53.68295 |       0.00951 |       0.68382
     -0.02245 |       0.00000 |      48.39749 |       0.00988 |       0.68344
     -0.02386 |       0.00000 |      43.32130 |       0.01025 |       0.68308
Evaluating losses...
     -0.02470 |       0.00000 |      40.87261 |       0.01052 |       0.68281
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01151 |       0.00000 |      82.19637 |       0.00466 |       0.68843
     -0.01697 |       0.00000 |      69.26693 |       0.00862 |       0.68449
     -0.01847 |       0.00000 |      43.15651 |       0.00810 |       0.68498
     -0.02042 |       0.00000 |      21.62543 |       0.00882 |       0.68426
     -0.02385 |       0.00000 |      15.40984 |       0.00976 |       0.68332
     -0.02669 |       0.00000 |      14.03332 |       0.01158 |       0.68152
     -0.02902 |       0.00000 |      12.81120 |       0.01290 |       0.68022
     -0.03013 |       0.00000 |      11.46009 |       0.01357 |       0.67956
     -0.03071 |       0.00000 |      10.29148 |       0.01412 |       0.67902
     -0.03112 |       0.00000 |       9.51243 |       0.01464 |       0.67850
Evaluating losses...
     -0.03148 |       0.00000 |       9.21034 |       0.01345 |       0.67967
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00117 |       0.00000 |      86.81593 |      5.70e-05 |       0.69309
     -0.00476 |       0.00000 |      84.35383 |       0.00081 |       0.69235
     -0.01052 |       0.00000 |      81.42336 |       0.00322 |       0.68997
     -0.01426 |       0.00000 |      77.88359 |       0.00746 |       0.68581
     -0.01549 |       0.00000 |      73.71684 |       0.00815 |       0.68512
     -0.01650 |       0.00000 |      69.02155 |       0.00901 |       0.68428
     -0.01771 |       0.00000 |      64.00787 |       0.00884 |       0.68444
     -0.01924 |       0.00000 |      58.87017 |       0.00922 |       0.68405
     -0.02056 |       0.00000 |      53.73335 |       0.00987 |       0.68341
     -0.02202 |       0.00000 |      48.65154 |       0.01072 |       0.68258
Evaluating losses...
     -0.02289 |       0.00000 |      46.11890 |       0.01137 |       0.68194
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01008 |       0.00000 |      86.42735 |       0.00337 |       0.68985
     -0.01729 |       0.00000 |      72.95591 |       0.00876 |       0.68456
     -0.01936 |       0.00000 |      46.10416 |       0.00863 |       0.68468
     -0.02200 |       0.00000 |      23.51870 |       0.00910 |       0.68420
     -0.02532 |       0.00000 |      16.77666 |       0.01008 |       0.68323
     -0.02801 |       0.00000 |      15.50128 |       0.01210 |       0.68125
     -0.02960 |       0.00000 |      14.53804 |       0.01312 |       0.68024
     -0.03078 |       0.00000 |      13.41074 |       0.01375 |       0.67962
     -0.03156 |       0.00000 |      12.31178 |       0.01409 |       0.67929
     -0.03200 |       0.00000 |      11.42890 |       0.01475 |       0.67865
Evaluating losses...
     -0.03264 |       0.00000 |      11.07090 |       0.01429 |       0.67909
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00103 |       0.00000 |      88.86865 |      4.96e-05 |       0.69310
     -0.00429 |       0.00000 |      86.14326 |       0.00056 |       0.69260
     -0.00990 |       0.00000 |      82.99522 |       0.00290 |       0.69028
     -0.01422 |       0.00000 |      79.07030 |       0.00624 |       0.68700
     -0.01582 |       0.00000 |      74.43436 |       0.00832 |       0.68495
     -0.01660 |       0.00000 |      69.28665 |       0.00821 |       0.68505
     -0.01726 |       0.00000 |      63.78962 |       0.00913 |       0.68416
     -0.01852 |       0.00000 |      58.11999 |       0.00910 |       0.68417
     -0.01965 |       0.00000 |      52.50151 |       0.00927 |       0.68400
     -0.02110 |       0.00000 |      47.01829 |       0.00995 |       0.68333
Evaluating losses...
     -0.02184 |       0.00000 |      44.31744 |       0.01001 |       0.68326
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01317 |       0.00000 |      81.84148 |       0.00517 |       0.68811
     -0.01930 |       0.00000 |      69.45826 |       0.00911 |       0.68428
     -0.02093 |       0.00000 |      44.23819 |       0.00885 |       0.68452
     -0.02341 |       0.00000 |      22.61954 |       0.00933 |       0.68403
     -0.02639 |       0.00000 |      15.86472 |       0.01076 |       0.68262
     -0.02862 |       0.00000 |      14.40205 |       0.01209 |       0.68132
     -0.03025 |       0.00000 |      13.18725 |       0.01329 |       0.68014
     -0.03100 |       0.00000 |      11.87077 |       0.01384 |       0.67960
     -0.03154 |       0.00000 |      10.74386 |       0.01428 |       0.67917
     -0.03192 |       0.00000 |       9.96746 |       0.01473 |       0.67873
Evaluating losses...
     -0.03240 |       0.00000 |       9.65821 |       0.01434 |       0.67911
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00143 |       0.00000 |      92.57156 |      9.81e-05 |       0.69304
     -0.00554 |       0.00000 |      89.35051 |       0.00111 |       0.69201
     -0.01196 |       0.00000 |      85.44775 |       0.00412 |       0.68899
     -0.01482 |       0.00000 |      80.79097 |       0.00781 |       0.68534
     -0.01554 |       0.00000 |      75.47551 |       0.00838 |       0.68477
     -0.01594 |       0.00000 |      69.61095 |       0.00849 |       0.68467
     -0.01637 |       0.00000 |      63.44654 |       0.00887 |       0.68429
     -0.01699 |       0.00000 |      57.19914 |       0.00851 |       0.68464
     -0.01766 |       0.00000 |      51.02223 |       0.00911 |       0.68405
     -0.01858 |       0.00000 |      45.11502 |       0.00910 |       0.68405
Evaluating losses...
     -0.01942 |       0.00000 |      42.23765 |       0.00894 |       0.68420
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00755 |       0.00000 |      90.12591 |       0.00368 |       0.68959
     -0.01375 |       0.00000 |      76.99500 |       0.00768 |       0.68571
     -0.01596 |       0.00000 |      50.19967 |       0.00795 |       0.68544
     -0.01874 |       0.00000 |      25.60778 |       0.00871 |       0.68471
     -0.02158 |       0.00000 |      17.15874 |       0.00986 |       0.68358
     -0.02384 |       0.00000 |      15.43880 |       0.01130 |       0.68217
     -0.02518 |       0.00000 |      14.32393 |       0.01173 |       0.68174
     -0.02695 |       0.00000 |      13.11457 |       0.01267 |       0.68082
     -0.02794 |       0.00000 |      11.97528 |       0.01290 |       0.68059
     -0.02860 |       0.00000 |      11.07074 |       0.01288 |       0.68060
Evaluating losses...
     -0.02920 |       0.00000 |      10.70207 |       0.01254 |       0.68093
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00160 |       0.00000 |      87.78650 |       0.00013 |       0.69301
     -0.00744 |       0.00000 |      84.94485 |       0.00165 |       0.69146
     -0.01181 |       0.00000 |      81.52028 |       0.00475 |       0.68838
     -0.01413 |       0.00000 |      77.23489 |       0.00597 |       0.68720
     -0.01632 |       0.00000 |      72.12130 |       0.00765 |       0.68557
     -0.01729 |       0.00000 |      66.43716 |       0.00879 |       0.68446
     -0.01817 |       0.00000 |      60.34669 |       0.00845 |       0.68479
     -0.01892 |       0.00000 |      54.27093 |       0.00902 |       0.68421
     -0.01995 |       0.00000 |      48.35911 |       0.00944 |       0.68380
     -0.02108 |       0.00000 |      42.80521 |       0.00930 |       0.68392
Evaluating losses...
     -0.02187 |       0.00000 |      40.13490 |       0.00998 |       0.68325
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01157 |       0.00000 |      93.38070 |       0.00445 |       0.68887
     -0.01824 |       0.00000 |      79.58040 |       0.00798 |       0.68545
     -0.01970 |       0.00000 |      51.40902 |       0.00828 |       0.68515
     -0.02195 |       0.00000 |      26.18246 |       0.00843 |       0.68499
     -0.02482 |       0.00000 |      17.53601 |       0.00974 |       0.68370
     -0.02740 |       0.00000 |      15.73325 |       0.01121 |       0.68226
     -0.02846 |       0.00000 |      14.55217 |       0.01237 |       0.68112
     -0.02932 |       0.00000 |      13.36258 |       0.01265 |       0.68085
     -0.02957 |       0.00000 |      12.32869 |       0.01333 |       0.68018
     -0.02990 |       0.00000 |      11.60425 |       0.01340 |       0.68012
Evaluating losses...
     -0.03022 |       0.00000 |      11.32777 |       0.01247 |       0.68101
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00148 |       0.00000 |      82.67216 |       0.00021 |       0.69295
     -0.00632 |       0.00000 |      79.75197 |       0.00156 |       0.69162
     -0.01165 |       0.00000 |      76.48232 |       0.00491 |       0.68832
     -0.01519 |       0.00000 |      72.67194 |       0.00670 |       0.68655
     -0.01720 |       0.00000 |      68.32646 |       0.00814 |       0.68513
     -0.01840 |       0.00000 |      63.55124 |       0.00909 |       0.68420
     -0.01947 |       0.00000 |      58.53049 |       0.00958 |       0.68372
     -0.02036 |       0.00000 |      53.38581 |       0.00987 |       0.68343
     -0.02180 |       0.00000 |      48.28053 |       0.01010 |       0.68320
     -0.02337 |       0.00000 |      43.28672 |       0.01035 |       0.68295
Evaluating losses...
     -0.02365 |       0.00000 |      40.80139 |       0.01079 |       0.68252
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01050 |       0.00000 |      88.34005 |       0.00379 |       0.68927
     -0.01691 |       0.00000 |      75.00797 |       0.00839 |       0.68465
     -0.01909 |       0.00000 |      47.44946 |       0.00822 |       0.68480
     -0.02184 |       0.00000 |      23.71519 |       0.00926 |       0.68377
     -0.02540 |       0.00000 |      16.39235 |       0.01050 |       0.68253
     -0.02755 |       0.00000 |      14.75217 |       0.01193 |       0.68111
     -0.02901 |       0.00000 |      13.43847 |       0.01269 |       0.68036
     -0.02983 |       0.00000 |      12.14450 |       0.01323 |       0.67984
     -0.02979 |       0.00000 |      11.06228 |       0.01320 |       0.67987
     -0.03061 |       0.00000 |      10.31563 |       0.01404 |       0.67904
Evaluating losses...
     -0.03108 |       0.00000 |      10.03807 |       0.01332 |       0.67975
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00050 |       0.00000 |      84.54591 |      2.95e-05 |       0.69312
     -0.00210 |       0.00000 |      81.73817 |       0.00024 |       0.69290
     -0.00471 |       0.00000 |      78.37805 |       0.00110 |       0.69204
     -0.00858 |       0.00000 |      74.20887 |       0.00376 |       0.68940
     -0.01160 |       0.00000 |      69.34976 |       0.00684 |       0.68637
     -0.01267 |       0.00000 |      64.06535 |       0.00802 |       0.68520
     -0.01412 |       0.00000 |      58.50449 |       0.00833 |       0.68490
     -0.01591 |       0.00000 |      52.90482 |       0.00873 |       0.68449
     -0.01715 |       0.00000 |      47.40060 |       0.00980 |       0.68344
     -0.01840 |       0.00000 |      42.19008 |       0.01020 |       0.68304
Evaluating losses...
     -0.01946 |       0.00000 |      39.71388 |       0.01096 |       0.68229
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00555 |       0.00000 |      93.16779 |       0.00209 |       0.69104
     -0.01176 |       0.00000 |      79.66151 |       0.00754 |       0.68562
     -0.01332 |       0.00000 |      51.13955 |       0.00794 |       0.68522
     -0.01591 |       0.00000 |      25.84257 |       0.00835 |       0.68481
     -0.01924 |       0.00000 |      17.64361 |       0.00971 |       0.68345
     -0.02182 |       0.00000 |      16.17336 |       0.01086 |       0.68232
     -0.02414 |       0.00000 |      15.25464 |       0.01213 |       0.68107
     -0.02567 |       0.00000 |      14.14559 |       0.01271 |       0.68051
     -0.02665 |       0.00000 |      13.02340 |       0.01285 |       0.68036
     -0.02734 |       0.00000 |      12.07571 |       0.01299 |       0.68023
Evaluating losses...
     -0.02770 |       0.00000 |      11.68130 |       0.01188 |       0.68132
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00062 |       0.00000 |      83.32268 |      4.88e-05 |       0.69310
     -0.00309 |       0.00000 |      80.17848 |       0.00041 |       0.69273
     -0.00678 |       0.00000 |      76.31025 |       0.00185 |       0.69128
     -0.01138 |       0.00000 |      71.51294 |       0.00542 |       0.68773
     -0.01320 |       0.00000 |      65.89616 |       0.00827 |       0.68492
     -0.01406 |       0.00000 |      59.75692 |       0.00829 |       0.68490
     -0.01522 |       0.00000 |      53.40871 |       0.00885 |       0.68434
     -0.01631 |       0.00000 |      47.10310 |       0.00897 |       0.68422
     -0.01757 |       0.00000 |      41.16231 |       0.00960 |       0.68360
     -0.01895 |       0.00000 |      35.72587 |       0.01032 |       0.68288
Evaluating losses...
     -0.01971 |       0.00000 |      33.19632 |       0.01052 |       0.68268
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01035 |       0.00000 |      85.03572 |       0.00429 |       0.68892
     -0.01704 |       0.00000 |      71.37325 |       0.00843 |       0.68485
     -0.01888 |       0.00000 |      44.75749 |       0.00825 |       0.68501
     -0.02101 |       0.00000 |      22.31024 |       0.00877 |       0.68450
     -0.02340 |       0.00000 |      15.57011 |       0.01022 |       0.68306
     -0.02531 |       0.00000 |      14.09953 |       0.01132 |       0.68199
     -0.02672 |       0.00000 |      12.86815 |       0.01236 |       0.68097
     -0.02777 |       0.00000 |      11.62395 |       0.01280 |       0.68053
     -0.02858 |       0.00000 |      10.60992 |       0.01328 |       0.68005
     -0.02904 |       0.00000 |       9.93414 |       0.01369 |       0.67965
Evaluating losses...
     -0.02906 |       0.00000 |       9.67275 |       0.01533 |       0.67806
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00218 |       0.00000 |      87.57198 |       0.00015 |       0.69301
     -0.00860 |       0.00000 |      85.06114 |       0.00170 |       0.69147
     -0.01638 |       0.00000 |      81.96168 |       0.00572 |       0.68752
     -0.01876 |       0.00000 |      78.16972 |       0.00918 |       0.68414
     -0.01960 |       0.00000 |      73.73335 |       0.00925 |       0.68407
     -0.02033 |       0.00000 |      68.72253 |       0.00926 |       0.68406
     -0.02166 |       0.00000 |      63.17629 |       0.00948 |       0.68383
     -0.02272 |       0.00000 |      57.31128 |       0.00956 |       0.68374
     -0.02414 |       0.00000 |      51.44513 |       0.01060 |       0.68272
     -0.02550 |       0.00000 |      45.83947 |       0.01085 |       0.68248
Evaluating losses...
     -0.02603 |       0.00000 |      43.12706 |       0.01122 |       0.68211
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00886 |       0.00000 |      90.68262 |       0.00353 |       0.68975
     -0.01653 |       0.00000 |      76.56348 |       0.00834 |       0.68509
     -0.01816 |       0.00000 |      48.03869 |       0.00815 |       0.68527
     -0.02049 |       0.00000 |      24.30190 |       0.00890 |       0.68453
     -0.02345 |       0.00000 |      17.12639 |       0.00982 |       0.68363
     -0.02551 |       0.00000 |      15.70752 |       0.01116 |       0.68232
     -0.02758 |       0.00000 |      14.64340 |       0.01251 |       0.68100
     -0.02910 |       0.00000 |      13.40118 |       0.01306 |       0.68046
     -0.02994 |       0.00000 |      12.22932 |       0.01358 |       0.67995
     -0.03059 |       0.00000 |      11.30037 |       0.01369 |       0.67984
Evaluating losses...
     -0.03102 |       0.00000 |      10.91669 |       0.01373 |       0.67979
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00065 |       0.00000 |      92.93278 |      4.22e-05 |       0.69311
     -0.00342 |       0.00000 |      90.14835 |       0.00040 |       0.69273
     -0.00842 |       0.00000 |      86.87820 |       0.00239 |       0.69072
     -0.01337 |       0.00000 |      82.88710 |       0.00641 |       0.68674
     -0.01461 |       0.00000 |      78.10151 |       0.00838 |       0.68481
     -0.01565 |       0.00000 |      72.73936 |       0.00850 |       0.68469
     -0.01707 |       0.00000 |      66.99105 |       0.00831 |       0.68488
     -0.01906 |       0.00000 |      61.11649 |       0.00919 |       0.68401
     -0.02087 |       0.00000 |      55.25866 |       0.00991 |       0.68330
     -0.02174 |       0.00000 |      49.63628 |       0.01035 |       0.68286
Evaluating losses...
     -0.02270 |       0.00000 |      46.93147 |       0.01036 |       0.68286
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00966 |       0.00000 |      83.29076 |       0.00449 |       0.68857
     -0.01601 |       0.00000 |      70.82744 |       0.00843 |       0.68461
     -0.01742 |       0.00000 |      45.33288 |       0.00818 |       0.68485
     -0.01949 |       0.00000 |      22.88394 |       0.00900 |       0.68403
     -0.02202 |       0.00000 |      15.97443 |       0.00984 |       0.68319
     -0.02411 |       0.00000 |      14.55679 |       0.01136 |       0.68168
     -0.02581 |       0.00000 |      13.41039 |       0.01209 |       0.68096
     -0.02652 |       0.00000 |      12.17602 |       0.01280 |       0.68026
     -0.02742 |       0.00000 |      11.06615 |       0.01334 |       0.67972
     -0.02800 |       0.00000 |      10.25057 |       0.01336 |       0.67971
Evaluating losses...
     -0.02780 |       0.00000 |       9.92878 |       0.01583 |       0.67728
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00092 |       0.00000 |      82.15552 |      6.00e-05 |       0.69308
     -0.00456 |       0.00000 |      79.38806 |       0.00055 |       0.69258
     -0.01083 |       0.00000 |      76.13938 |       0.00306 |       0.69007
     -0.01622 |       0.00000 |      72.24685 |       0.00729 |       0.68587
     -0.01704 |       0.00000 |      67.72958 |       0.00893 |       0.68427
     -0.01760 |       0.00000 |      62.62695 |       0.00861 |       0.68457
     -0.01861 |       0.00000 |      57.23410 |       0.00893 |       0.68426
     -0.01985 |       0.00000 |      51.79244 |       0.00938 |       0.68381
     -0.02109 |       0.00000 |      46.47114 |       0.00953 |       0.68366
     -0.02237 |       0.00000 |      41.43890 |       0.00976 |       0.68342
Evaluating losses...
     -0.02280 |       0.00000 |      38.96920 |       0.01122 |       0.68199
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00793 |       0.00000 |      86.64021 |       0.00378 |       0.68930
     -0.01437 |       0.00000 |      73.30862 |       0.00742 |       0.68564
     -0.01646 |       0.00000 |      46.49519 |       0.00841 |       0.68465
     -0.01898 |       0.00000 |      23.59510 |       0.00880 |       0.68426
     -0.02188 |       0.00000 |      16.50873 |       0.00985 |       0.68321
     -0.02405 |       0.00000 |      15.07453 |       0.01117 |       0.68191
     -0.02622 |       0.00000 |      13.87229 |       0.01218 |       0.68091
     -0.02747 |       0.00000 |      12.55266 |       0.01245 |       0.68065
     -0.02818 |       0.00000 |      11.37253 |       0.01319 |       0.67992
     -0.02807 |       0.00000 |      10.54922 |       0.01299 |       0.68012
Evaluating losses...
     -0.02892 |       0.00000 |      10.23689 |       0.01152 |       0.68157
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00113 |       0.00000 |      92.29986 |      7.73e-05 |       0.69307
     -0.00518 |       0.00000 |      89.71243 |       0.00075 |       0.69243
     -0.01128 |       0.00000 |      86.70185 |       0.00337 |       0.68986
     -0.01534 |       0.00000 |      82.92664 |       0.00757 |       0.68575
     -0.01616 |       0.00000 |      78.31455 |       0.00890 |       0.68445
     -0.01676 |       0.00000 |      73.01691 |       0.00834 |       0.68499
     -0.01824 |       0.00000 |      67.20098 |       0.00909 |       0.68425
     -0.01978 |       0.00000 |      61.04557 |       0.00970 |       0.68366
     -0.02116 |       0.00000 |      54.87177 |       0.00954 |       0.68380
     -0.02250 |       0.00000 |      48.91386 |       0.01053 |       0.68283
Evaluating losses...
     -0.02369 |       0.00000 |      45.96093 |       0.01065 |       0.68271
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01145 |       0.00000 |      84.43867 |       0.00438 |       0.68879
     -0.01757 |       0.00000 |      71.92580 |       0.00825 |       0.68496
     -0.01886 |       0.00000 |      46.01248 |       0.00857 |       0.68464
     -0.02080 |       0.00000 |      22.96858 |       0.00905 |       0.68416
     -0.02391 |       0.00000 |      15.72140 |       0.00956 |       0.68364
     -0.02635 |       0.00000 |      14.46145 |       0.01133 |       0.68190
     -0.02800 |       0.00000 |      13.58731 |       0.01259 |       0.68066
     -0.02894 |       0.00000 |      12.58285 |       0.01309 |       0.68017
     -0.02993 |       0.00000 |      11.61616 |       0.01338 |       0.67988
     -0.02972 |       0.00000 |      10.84126 |       0.01415 |       0.67912
Evaluating losses...
     -0.03057 |       0.00000 |      10.51253 |       0.01367 |       0.67959
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00187 |       0.00000 |      91.90188 |      8.64e-05 |       0.69306
     -0.00862 |       0.00000 |      88.72911 |       0.00185 |       0.69130
     -0.01591 |       0.00000 |      84.95483 |       0.00526 |       0.68791
     -0.01966 |       0.00000 |      80.33218 |       0.00892 |       0.68431
     -0.02046 |       0.00000 |      74.93359 |       0.00903 |       0.68418
     -0.02148 |       0.00000 |      68.99663 |       0.01010 |       0.68314
     -0.02278 |       0.00000 |      62.82580 |       0.00955 |       0.68367
     -0.02390 |       0.00000 |      56.56760 |       0.00978 |       0.68344
     -0.02564 |       0.00000 |      50.49776 |       0.01048 |       0.68274
     -0.02700 |       0.00000 |      44.70416 |       0.01057 |       0.68265
Evaluating losses...
     -0.02747 |       0.00000 |      41.88190 |       0.01127 |       0.68196
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00998 |       0.00000 |      86.82157 |       0.00388 |       0.68921
     -0.01538 |       0.00000 |      73.08336 |       0.00788 |       0.68521
     -0.01686 |       0.00000 |      45.88579 |       0.00813 |       0.68496
     -0.01933 |       0.00000 |      23.32831 |       0.00865 |       0.68443
     -0.02194 |       0.00000 |      16.56345 |       0.01016 |       0.68294
     -0.02334 |       0.00000 |      15.14886 |       0.01160 |       0.68151
     -0.02527 |       0.00000 |      13.97928 |       0.01253 |       0.68059
     -0.02620 |       0.00000 |      12.71615 |       0.01277 |       0.68036
     -0.02704 |       0.00000 |      11.63198 |       0.01350 |       0.67964
     -0.02758 |       0.00000 |      10.82688 |       0.01333 |       0.67981
Evaluating losses...
     -0.02789 |       0.00000 |      10.51423 |       0.01279 |       0.68034
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00144 |       0.00000 |      87.27119 |      9.62e-05 |       0.69306
     -0.00612 |       0.00000 |      84.31818 |       0.00096 |       0.69221
     -0.01364 |       0.00000 |      80.94178 |       0.00428 |       0.68893
     -0.01716 |       0.00000 |      76.86590 |       0.00859 |       0.68471
     -0.01807 |       0.00000 |      72.21605 |       0.00897 |       0.68433
     -0.01912 |       0.00000 |      67.17062 |       0.00914 |       0.68416
     -0.02073 |       0.00000 |      61.88117 |       0.00946 |       0.68384
     -0.02214 |       0.00000 |      56.42513 |       0.00959 |       0.68370
     -0.02411 |       0.00000 |      50.99893 |       0.01045 |       0.68286
     -0.02571 |       0.00000 |      45.81181 |       0.01104 |       0.68228
Evaluating losses...
     -0.02667 |       0.00000 |      43.26185 |       0.01108 |       0.68224
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01036 |       0.00000 |      85.38599 |       0.00453 |       0.68849
     -0.01631 |       0.00000 |      71.84964 |       0.00809 |       0.68489
     -0.01778 |       0.00000 |      45.34545 |       0.00808 |       0.68490
     -0.02036 |       0.00000 |      23.15061 |       0.00848 |       0.68449
     -0.02369 |       0.00000 |      16.39988 |       0.01006 |       0.68292
     -0.02644 |       0.00000 |      14.91525 |       0.01190 |       0.68110
     -0.02823 |       0.00000 |      13.68265 |       0.01290 |       0.68010
     -0.02903 |       0.00000 |      12.36973 |       0.01358 |       0.67944
     -0.02993 |       0.00000 |      11.26917 |       0.01436 |       0.67867
     -0.03032 |       0.00000 |      10.44166 |       0.01454 |       0.67850
Evaluating losses...
     -0.03037 |       0.00000 |      10.12936 |       0.01642 |       0.67664
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00189 |       0.00000 |      89.04064 |       0.00012 |       0.69302
     -0.00757 |       0.00000 |      85.96806 |       0.00131 |       0.69181
     -0.01421 |       0.00000 |      82.51702 |       0.00460 |       0.68855
     -0.01752 |       0.00000 |      78.38688 |       0.00747 |       0.68570
     -0.01909 |       0.00000 |      73.60558 |       0.00862 |       0.68457
     -0.02014 |       0.00000 |      68.40968 |       0.00918 |       0.68401
     -0.02129 |       0.00000 |      62.96860 |       0.00897 |       0.68421
     -0.02273 |       0.00000 |      57.50697 |       0.00935 |       0.68383
     -0.02383 |       0.00000 |      52.09620 |       0.00976 |       0.68343
     -0.02555 |       0.00000 |      46.86465 |       0.01077 |       0.68243
Evaluating losses...
     -0.02642 |       0.00000 |      44.23167 |       0.00955 |       0.68362
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01109 |       0.00000 |      84.41344 |       0.00469 |       0.68867
     -0.01611 |       0.00000 |      72.67796 |       0.00788 |       0.68558
     -0.01713 |       0.00000 |      47.50085 |       0.00864 |       0.68485
     -0.01856 |       0.00000 |      24.22056 |       0.00818 |       0.68528
     -0.02139 |       0.00000 |      16.58832 |       0.00958 |       0.68391
     -0.02420 |       0.00000 |      15.05393 |       0.01111 |       0.68241
     -0.02554 |       0.00000 |      13.92655 |       0.01262 |       0.68094
     -0.02597 |       0.00000 |      12.73156 |       0.01314 |       0.68043
     -0.02717 |       0.00000 |      11.69737 |       0.01330 |       0.68027
     -0.02776 |       0.00000 |      10.95195 |       0.01367 |       0.67990
Evaluating losses...
     -0.02814 |       0.00000 |      10.66189 |       0.01323 |       0.68033
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00152 |       0.00000 |      85.84390 |      8.09e-05 |       0.69307
     -0.00631 |       0.00000 |      83.11345 |       0.00086 |       0.69229
     -0.01340 |       0.00000 |      79.94825 |       0.00417 |       0.68901
     -0.01730 |       0.00000 |      76.18337 |       0.00788 |       0.68535
     -0.01865 |       0.00000 |      71.81384 |       0.00872 |       0.68451
     -0.01947 |       0.00000 |      66.91420 |       0.00904 |       0.68420
     -0.02071 |       0.00000 |      61.66488 |       0.00928 |       0.68396
     -0.02190 |       0.00000 |      56.16942 |       0.00917 |       0.68406
     -0.02323 |       0.00000 |      50.66674 |       0.00977 |       0.68346
     -0.02462 |       0.00000 |      45.34885 |       0.01035 |       0.68289
Evaluating losses...
     -0.02516 |       0.00000 |      42.72345 |       0.01146 |       0.68180
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00907 |       0.00000 |      84.45172 |       0.00388 |       0.68942
     -0.01442 |       0.00000 |      71.87279 |       0.00827 |       0.68518
     -0.01591 |       0.00000 |      45.79955 |       0.00769 |       0.68574
     -0.01864 |       0.00000 |      23.08605 |       0.00867 |       0.68477
     -0.02142 |       0.00000 |      16.13846 |       0.01007 |       0.68340
     -0.02414 |       0.00000 |      14.68922 |       0.01166 |       0.68185
     -0.02592 |       0.00000 |      13.53870 |       0.01257 |       0.68094
     -0.02709 |       0.00000 |      12.35378 |       0.01306 |       0.68046
     -0.02780 |       0.00000 |      11.34033 |       0.01341 |       0.68012
     -0.02826 |       0.00000 |      10.58750 |       0.01354 |       0.67998
Evaluating losses...
     -0.02890 |       0.00000 |      10.30445 |       0.01470 |       0.67886
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00111 |       0.00000 |      88.78886 |      6.34e-05 |       0.69308
     -0.00444 |       0.00000 |      85.89194 |       0.00082 |       0.69232
     -0.00962 |       0.00000 |      82.48000 |       0.00338 |       0.68977
     -0.01308 |       0.00000 |      78.33346 |       0.00684 |       0.68636
     -0.01426 |       0.00000 |      73.45074 |       0.00831 |       0.68492
     -0.01489 |       0.00000 |      68.01080 |       0.00808 |       0.68514
     -0.01557 |       0.00000 |      62.20172 |       0.00829 |       0.68492
     -0.01648 |       0.00000 |      56.21609 |       0.00860 |       0.68461
     -0.01772 |       0.00000 |      50.28213 |       0.00936 |       0.68387
     -0.01826 |       0.00000 |      44.58400 |       0.00905 |       0.68417
Evaluating losses...
     -0.01917 |       0.00000 |      41.83926 |       0.01035 |       0.68290
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00906 |       0.00000 |      82.39948 |       0.00320 |       0.68989
     -0.01642 |       0.00000 |      69.59775 |       0.00830 |       0.68478
     -0.01875 |       0.00000 |      43.72654 |       0.00832 |       0.68475
     -0.02198 |       0.00000 |      22.04447 |       0.00983 |       0.68325
     -0.02559 |       0.00000 |      15.65634 |       0.01114 |       0.68195
     -0.02760 |       0.00000 |      14.28641 |       0.01263 |       0.68048
     -0.02901 |       0.00000 |      13.13403 |       0.01313 |       0.67999
     -0.02978 |       0.00000 |      11.91782 |       0.01340 |       0.67972
     -0.03018 |       0.00000 |      10.85070 |       0.01345 |       0.67968
     -0.03033 |       0.00000 |      10.07929 |       0.01353 |       0.67960
Evaluating losses...
     -0.03075 |       0.00000 |       9.78915 |       0.01272 |       0.68040
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00220 |       0.00000 |      82.65036 |       0.00020 |       0.69295
     -0.00871 |       0.00000 |      80.13529 |       0.00177 |       0.69141
     -0.01589 |       0.00000 |      77.12904 |       0.00578 |       0.68748
     -0.01800 |       0.00000 |      73.38066 |       0.00903 |       0.68431
     -0.01865 |       0.00000 |      68.93541 |       0.00937 |       0.68398
     -0.01933 |       0.00000 |      64.01041 |       0.00873 |       0.68460
     -0.02030 |       0.00000 |      58.74684 |       0.00963 |       0.68372
     -0.02187 |       0.00000 |      53.36419 |       0.00968 |       0.68366
     -0.02360 |       0.00000 |      48.10221 |       0.00998 |       0.68336
     -0.02474 |       0.00000 |      43.09307 |       0.01069 |       0.68266
Evaluating losses...
     -0.02591 |       0.00000 |      40.67635 |       0.01144 |       0.68193
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00798 |       0.00000 |      86.06012 |       0.00324 |       0.68985
     -0.01452 |       0.00000 |      73.23866 |       0.00781 |       0.68525
     -0.01626 |       0.00000 |      46.70647 |       0.00761 |       0.68544
     -0.01846 |       0.00000 |      23.41158 |       0.00843 |       0.68462
     -0.02135 |       0.00000 |      16.06581 |       0.01000 |       0.68306
     -0.02394 |       0.00000 |      14.49815 |       0.01122 |       0.68185
     -0.02578 |       0.00000 |      13.31151 |       0.01233 |       0.68075
     -0.02665 |       0.00000 |      12.09765 |       0.01296 |       0.68013
     -0.02730 |       0.00000 |      11.13941 |       0.01304 |       0.68005
     -0.02755 |       0.00000 |      10.49755 |       0.01349 |       0.67961
Evaluating losses...
     -0.02812 |       0.00000 |      10.26345 |       0.01280 |       0.68030
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00153 |       0.00000 |      92.78664 |      9.31e-05 |       0.69306
     -0.00608 |       0.00000 |      89.83807 |       0.00111 |       0.69204
     -0.01325 |       0.00000 |      86.34062 |       0.00486 |       0.68835
     -0.01601 |       0.00000 |      82.14304 |       0.00810 |       0.68516
     -0.01693 |       0.00000 |      77.27304 |       0.00915 |       0.68414
     -0.01751 |       0.00000 |      71.79948 |       0.00934 |       0.68395
     -0.01868 |       0.00000 |      65.70469 |       0.00923 |       0.68405
     -0.02000 |       0.00000 |      59.31097 |       0.00935 |       0.68393
     -0.02119 |       0.00000 |      52.97464 |       0.01005 |       0.68324
     -0.02239 |       0.00000 |      46.95592 |       0.01041 |       0.68288
Evaluating losses...
     -0.02272 |       0.00000 |      44.00436 |       0.01104 |       0.68227
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00903 |       0.00000 |      87.13664 |       0.00417 |       0.68902
     -0.01620 |       0.00000 |      73.84418 |       0.00785 |       0.68539
     -0.01763 |       0.00000 |      46.84117 |       0.00858 |       0.68467
     -0.01976 |       0.00000 |      23.75183 |       0.00892 |       0.68432
     -0.02288 |       0.00000 |      16.56990 |       0.00977 |       0.68348
     -0.02565 |       0.00000 |      15.04072 |       0.01136 |       0.68191
     -0.02709 |       0.00000 |      13.82777 |       0.01254 |       0.68076
     -0.02820 |       0.00000 |      12.52185 |       0.01317 |       0.68014
     -0.02881 |       0.00000 |      11.39775 |       0.01376 |       0.67956
     -0.02936 |       0.00000 |      10.61173 |       0.01367 |       0.67965
Evaluating losses...
     -0.02949 |       0.00000 |      10.30911 |       0.01545 |       0.67791
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00214 |       0.00000 |      88.74783 |       0.00016 |       0.69300
     -0.00834 |       0.00000 |      86.16808 |       0.00163 |       0.69156
     -0.01511 |       0.00000 |      83.07760 |       0.00487 |       0.68839
     -0.01849 |       0.00000 |      79.15213 |       0.00740 |       0.68591
     -0.01948 |       0.00000 |      74.40977 |       0.00942 |       0.68395
     -0.02033 |       0.00000 |      69.12064 |       0.00900 |       0.68434
     -0.02165 |       0.00000 |      63.43567 |       0.00913 |       0.68421
     -0.02287 |       0.00000 |      57.63133 |       0.00973 |       0.68362
     -0.02419 |       0.00000 |      51.93082 |       0.01021 |       0.68315
     -0.02541 |       0.00000 |      46.50481 |       0.01092 |       0.68244
Evaluating losses...
     -0.02591 |       0.00000 |      43.83649 |       0.01135 |       0.68203
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00840 |       0.00000 |      85.51215 |       0.00400 |       0.68911
     -0.01520 |       0.00000 |      72.80421 |       0.00738 |       0.68573
     -0.01718 |       0.00000 |      46.45718 |       0.00786 |       0.68526
     -0.02009 |       0.00000 |      23.27965 |       0.00907 |       0.68405
     -0.02289 |       0.00000 |      16.02235 |       0.01049 |       0.68264
     -0.02529 |       0.00000 |      14.50953 |       0.01203 |       0.68112
     -0.02641 |       0.00000 |      13.40009 |       0.01235 |       0.68080
     -0.02729 |       0.00000 |      12.21269 |       0.01303 |       0.68013
     -0.02818 |       0.00000 |      11.14872 |       0.01327 |       0.67990
     -0.02861 |       0.00000 |      10.34144 |       0.01345 |       0.67971
Evaluating losses...
     -0.02914 |       0.00000 |      10.01117 |       0.01350 |       0.67967
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00166 |       0.00000 |      90.01681 |      8.23e-05 |       0.69306
     -0.00692 |       0.00000 |      87.12061 |       0.00125 |       0.69190
     -0.01442 |       0.00000 |      83.58509 |       0.00457 |       0.68861
     -0.01805 |       0.00000 |      79.15704 |       0.00831 |       0.68492
     -0.01885 |       0.00000 |      73.88192 |       0.00914 |       0.68410
     -0.01929 |       0.00000 |      68.05437 |       0.00863 |       0.68460
     -0.01984 |       0.00000 |      61.93132 |       0.00932 |       0.68392
     -0.02081 |       0.00000 |      55.71634 |       0.00932 |       0.68391
     -0.02150 |       0.00000 |      49.70179 |       0.00922 |       0.68401
     -0.02263 |       0.00000 |      43.99254 |       0.00934 |       0.68389
Evaluating losses...
     -0.02333 |       0.00000 |      41.28985 |       0.00948 |       0.68374
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00986 |       0.00000 |      85.79297 |       0.00383 |       0.68961
     -0.01578 |       0.00000 |      72.96471 |       0.00806 |       0.68563
     -0.01742 |       0.00000 |      46.43739 |       0.00804 |       0.68564
     -0.01951 |       0.00000 |      23.46689 |       0.00853 |       0.68516
     -0.02161 |       0.00000 |      16.32078 |       0.00977 |       0.68396
     -0.02397 |       0.00000 |      14.87145 |       0.01088 |       0.68288
     -0.02550 |       0.00000 |      13.72407 |       0.01204 |       0.68176
     -0.02637 |       0.00000 |      12.51842 |       0.01270 |       0.68111
     -0.02752 |       0.00000 |      11.48017 |       0.01298 |       0.68084
     -0.02823 |       0.00000 |      10.76117 |       0.01337 |       0.68045
Evaluating losses...
     -0.02839 |       0.00000 |      10.47871 |       0.01219 |       0.68157
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00146 |       0.00000 |      86.55489 |       0.00013 |       0.69303
     -0.00613 |       0.00000 |      84.00195 |       0.00120 |       0.69198
     -0.01133 |       0.00000 |      81.03104 |       0.00440 |       0.68886
     -0.01430 |       0.00000 |      77.55093 |       0.00607 |       0.68722
     -0.01662 |       0.00000 |      73.50786 |       0.00743 |       0.68587
     -0.01778 |       0.00000 |      68.89212 |       0.00886 |       0.68446
     -0.01880 |       0.00000 |      63.74696 |       0.00919 |       0.68413
     -0.02028 |       0.00000 |      58.27221 |       0.00929 |       0.68403
     -0.02234 |       0.00000 |      52.66978 |       0.00972 |       0.68360
     -0.02373 |       0.00000 |      47.12836 |       0.01070 |       0.68264
Evaluating losses...
     -0.02465 |       0.00000 |      44.41477 |       0.01114 |       0.68221
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00943 |       0.00000 |      85.24906 |       0.00402 |       0.68907
     -0.01504 |       0.00000 |      72.22282 |       0.00808 |       0.68500
     -0.01659 |       0.00000 |      46.01956 |       0.00809 |       0.68498
     -0.01930 |       0.00000 |      23.09699 |       0.00882 |       0.68425
     -0.02199 |       0.00000 |      15.88735 |       0.01000 |       0.68308
     -0.02420 |       0.00000 |      14.40310 |       0.01142 |       0.68168
     -0.02579 |       0.00000 |      13.28885 |       0.01217 |       0.68093
     -0.02637 |       0.00000 |      12.09821 |       0.01303 |       0.68008
     -0.02740 |       0.00000 |      11.07673 |       0.01306 |       0.68005
     -0.02778 |       0.00000 |      10.33180 |       0.01357 |       0.67956
Evaluating losses...
     -0.02841 |       0.00000 |      10.04632 |       0.01346 |       0.67967
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00122 |       0.00000 |      89.24196 |      9.03e-05 |       0.69305
     -0.00524 |       0.00000 |      86.66640 |       0.00112 |       0.69202
     -0.01158 |       0.00000 |      83.45370 |       0.00358 |       0.68959
     -0.01531 |       0.00000 |      79.37328 |       0.00719 |       0.68605
     -0.01623 |       0.00000 |      74.53493 |       0.00856 |       0.68472
     -0.01723 |       0.00000 |      69.09460 |       0.00887 |       0.68441
     -0.01852 |       0.00000 |      63.36459 |       0.00903 |       0.68424
     -0.01924 |       0.00000 |      57.57689 |       0.00950 |       0.68378
     -0.02041 |       0.00000 |      51.91459 |       0.00941 |       0.68386
     -0.02142 |       0.00000 |      46.44329 |       0.00999 |       0.68328
Evaluating losses...
     -0.02211 |       0.00000 |      43.75854 |       0.01129 |       0.68201
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00942 |       0.00000 |      87.25423 |       0.00455 |       0.68863
     -0.01683 |       0.00000 |      74.25928 |       0.00744 |       0.68576
     -0.01829 |       0.00000 |      47.56697 |       0.00872 |       0.68451
     -0.02092 |       0.00000 |      24.24417 |       0.00886 |       0.68436
     -0.02411 |       0.00000 |      16.60412 |       0.01014 |       0.68309
     -0.02642 |       0.00000 |      14.93688 |       0.01184 |       0.68142
     -0.02803 |       0.00000 |      13.54546 |       0.01255 |       0.68071
     -0.02867 |       0.00000 |      12.17951 |       0.01356 |       0.67972
     -0.02931 |       0.00000 |      11.12272 |       0.01350 |       0.67977
     -0.03014 |       0.00000 |      10.42105 |       0.01413 |       0.67915
Evaluating losses...
     -0.03013 |       0.00000 |      10.16165 |       0.01244 |       0.68081
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00169 |       0.00000 |      79.72986 |      7.95e-05 |       0.69306
     -0.00741 |       0.00000 |      77.15208 |       0.00106 |       0.69204
     -0.01528 |       0.00000 |      74.04287 |       0.00521 |       0.68789
     -0.01885 |       0.00000 |      70.02494 |       0.00822 |       0.68493
     -0.01974 |       0.00000 |      65.33147 |       0.00957 |       0.68360
     -0.02051 |       0.00000 |      60.22167 |       0.00981 |       0.68336
     -0.02106 |       0.00000 |      54.91517 |       0.00928 |       0.68387
     -0.02214 |       0.00000 |      49.56942 |       0.01008 |       0.68308
     -0.02360 |       0.00000 |      44.44203 |       0.00961 |       0.68354
     -0.02479 |       0.00000 |      39.62210 |       0.01059 |       0.68257
Evaluating losses...
     -0.02550 |       0.00000 |      37.29400 |       0.01112 |       0.68205
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01066 |       0.00000 |      85.05210 |       0.00476 |       0.68842
     -0.01593 |       0.00000 |      71.99728 |       0.00787 |       0.68533
     -0.01722 |       0.00000 |      45.76023 |       0.00818 |       0.68502
     -0.01985 |       0.00000 |      23.23686 |       0.00933 |       0.68388
     -0.02353 |       0.00000 |      16.28624 |       0.01052 |       0.68270
     -0.02604 |       0.00000 |      14.81487 |       0.01229 |       0.68095
     -0.02722 |       0.00000 |      13.64536 |       0.01307 |       0.68019
     -0.02802 |       0.00000 |      12.40197 |       0.01365 |       0.67961
     -0.02830 |       0.00000 |      11.34664 |       0.01369 |       0.67958
     -0.02885 |       0.00000 |      10.58455 |       0.01398 |       0.67929
Evaluating losses...
     -0.02916 |       0.00000 |      10.29528 |       0.01471 |       0.67857
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00074 |       0.00000 |      86.66695 |      3.50e-05 |       0.69311
     -0.00346 |       0.00000 |      84.13883 |       0.00045 |       0.69270
     -0.00798 |       0.00000 |      80.88146 |       0.00257 |       0.69059
     -0.01146 |       0.00000 |      76.81142 |       0.00495 |       0.68824
     -0.01409 |       0.00000 |      71.99191 |       0.00697 |       0.68625
     -0.01526 |       0.00000 |      66.67894 |       0.00882 |       0.68443
     -0.01604 |       0.00000 |      61.03560 |       0.00847 |       0.68477
     -0.01751 |       0.00000 |      55.16572 |       0.00918 |       0.68407
     -0.01895 |       0.00000 |      49.32446 |       0.00941 |       0.68383
     -0.02021 |       0.00000 |      43.73243 |       0.01038 |       0.68288
Evaluating losses...
     -0.02134 |       0.00000 |      40.98227 |       0.00998 |       0.68326
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00728 |       0.00000 |      90.18010 |       0.00371 |       0.68954
     -0.01343 |       0.00000 |      76.45061 |       0.00716 |       0.68617
     -0.01553 |       0.00000 |      48.64346 |       0.00825 |       0.68510
     -0.01875 |       0.00000 |      25.02483 |       0.00888 |       0.68447
     -0.02252 |       0.00000 |      17.50279 |       0.01063 |       0.68276
     -0.02558 |       0.00000 |      15.78311 |       0.01217 |       0.68124
     -0.02745 |       0.00000 |      14.46000 |       0.01275 |       0.68067
     -0.02872 |       0.00000 |      13.10266 |       0.01366 |       0.67978
     -0.02974 |       0.00000 |      11.93731 |       0.01372 |       0.67971
     -0.03024 |       0.00000 |      11.12024 |       0.01428 |       0.67917
Evaluating losses...
     -0.03078 |       0.00000 |      10.78565 |       0.01434 |       0.67910
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00186 |       0.00000 |      92.56045 |       0.00015 |       0.69299
     -0.00810 |       0.00000 |      89.64294 |       0.00113 |       0.69203
     -0.01558 |       0.00000 |      86.30301 |       0.00490 |       0.68832
     -0.01937 |       0.00000 |      82.25626 |       0.00795 |       0.68535
     -0.02063 |       0.00000 |      77.50629 |       0.00940 |       0.68393
     -0.02187 |       0.00000 |      72.09576 |       0.00954 |       0.68379
     -0.02324 |       0.00000 |      66.24160 |       0.00952 |       0.68380
     -0.02489 |       0.00000 |      60.19678 |       0.01038 |       0.68295
     -0.02628 |       0.00000 |      54.18983 |       0.01040 |       0.68292
     -0.02782 |       0.00000 |      48.39282 |       0.01110 |       0.68223
Evaluating losses...
     -0.02857 |       0.00000 |      45.53265 |       0.01147 |       0.68187
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00833 |       0.00000 |      82.69453 |       0.00437 |       0.68883
     -0.01483 |       0.00000 |      70.26799 |       0.00795 |       0.68530
     -0.01623 |       0.00000 |      44.98674 |       0.00850 |       0.68476
     -0.01850 |       0.00000 |      22.87920 |       0.00914 |       0.68412
     -0.02152 |       0.00000 |      16.13530 |       0.01054 |       0.68274
     -0.02463 |       0.00000 |      14.57977 |       0.01200 |       0.68130
     -0.02614 |       0.00000 |      13.23693 |       0.01325 |       0.68007
     -0.02693 |       0.00000 |      11.86619 |       0.01371 |       0.67962
     -0.02731 |       0.00000 |      10.81356 |       0.01376 |       0.67957
     -0.02791 |       0.00000 |      10.09727 |       0.01442 |       0.67892
Evaluating losses...
     -0.02811 |       0.00000 |       9.82866 |       0.01340 |       0.67992
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00152 |       0.00000 |      87.74399 |       0.00015 |       0.69299
     -0.00648 |       0.00000 |      85.22046 |       0.00124 |       0.69191
     -0.01240 |       0.00000 |      81.92066 |       0.00422 |       0.68897
     -0.01701 |       0.00000 |      77.79312 |       0.00754 |       0.68572
     -0.01822 |       0.00000 |      72.90531 |       0.00908 |       0.68422
     -0.01924 |       0.00000 |      67.41311 |       0.00919 |       0.68410
     -0.02074 |       0.00000 |      61.54400 |       0.00924 |       0.68405
     -0.02246 |       0.00000 |      55.59412 |       0.00959 |       0.68369
     -0.02408 |       0.00000 |      49.75535 |       0.01017 |       0.68312
     -0.02589 |       0.00000 |      44.22805 |       0.01095 |       0.68235
Evaluating losses...
     -0.02622 |       0.00000 |      41.55143 |       0.01063 |       0.68266
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01357 |       0.00000 |      94.75963 |       0.00524 |       0.68799
     -0.01990 |       0.00000 |      81.29287 |       0.00844 |       0.68483
     -0.02145 |       0.00000 |      53.11887 |       0.00842 |       0.68484
     -0.02422 |       0.00000 |      27.06645 |       0.00916 |       0.68410
     -0.02728 |       0.00000 |      17.88453 |       0.01086 |       0.68243
     -0.02927 |       0.00000 |      15.89717 |       0.01230 |       0.68101
     -0.03037 |       0.00000 |      14.80519 |       0.01276 |       0.68056
     -0.03154 |       0.00000 |      13.57067 |       0.01375 |       0.67959
     -0.03215 |       0.00000 |      12.39954 |       0.01379 |       0.67954
     -0.03237 |       0.00000 |      11.47249 |       0.01389 |       0.67944
Evaluating losses...
     -0.03309 |       0.00000 |      11.10984 |       0.01428 |       0.67906
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00203 |       0.00000 |      79.41728 |       0.00016 |       0.69299
     -0.00798 |       0.00000 |      77.06290 |       0.00146 |       0.69170
     -0.01545 |       0.00000 |      74.14012 |       0.00587 |       0.68734
     -0.01769 |       0.00000 |      70.33366 |       0.00843 |       0.68483
     -0.01860 |       0.00000 |      65.68487 |       0.00911 |       0.68416
     -0.01943 |       0.00000 |      60.34314 |       0.00933 |       0.68394
     -0.02078 |       0.00000 |      54.71625 |       0.00993 |       0.68334
     -0.02209 |       0.00000 |      49.03238 |       0.00947 |       0.68379
     -0.02351 |       0.00000 |      43.57620 |       0.01057 |       0.68271
     -0.02472 |       0.00000 |      38.46797 |       0.01076 |       0.68251
Evaluating losses...
     -0.02584 |       0.00000 |      36.03527 |       0.01147 |       0.68182
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01158 |       0.00000 |      89.26398 |       0.00469 |       0.68845
     -0.01703 |       0.00000 |      75.51262 |       0.00829 |       0.68487
     -0.01871 |       0.00000 |      47.59376 |       0.00815 |       0.68500
     -0.02131 |       0.00000 |      23.73598 |       0.00921 |       0.68395
     -0.02460 |       0.00000 |      16.31974 |       0.01001 |       0.68314
     -0.02729 |       0.00000 |      14.68396 |       0.01135 |       0.68183
     -0.02903 |       0.00000 |      13.43940 |       0.01245 |       0.68074
     -0.03010 |       0.00000 |      12.22043 |       0.01332 |       0.67988
     -0.03060 |       0.00000 |      11.24926 |       0.01339 |       0.67981
     -0.03131 |       0.00000 |      10.57356 |       0.01375 |       0.67946
Evaluating losses...
     -0.03179 |       0.00000 |      10.31641 |       0.01318 |       0.68001
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00159 |       0.00000 |      91.20314 |      8.46e-05 |       0.69307
     -0.00668 |       0.00000 |      87.91050 |       0.00090 |       0.69226
     -0.01413 |       0.00000 |      84.01768 |       0.00376 |       0.68944
     -0.01767 |       0.00000 |      79.37942 |       0.00810 |       0.68519
     -0.01887 |       0.00000 |      74.04131 |       0.00881 |       0.68448
     -0.01967 |       0.00000 |      68.16748 |       0.00879 |       0.68449
     -0.02097 |       0.00000 |      62.04876 |       0.00962 |       0.68368
     -0.02272 |       0.00000 |      55.88336 |       0.00949 |       0.68379
     -0.02440 |       0.00000 |      49.87178 |       0.00989 |       0.68339
     -0.02634 |       0.00000 |      44.24851 |       0.01066 |       0.68263
Evaluating losses...
     -0.02736 |       0.00000 |      41.52361 |       0.01205 |       0.68127
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00843 |       0.00000 |      79.11314 |       0.00349 |       0.68952
     -0.01401 |       0.00000 |      65.81477 |       0.00793 |       0.68499
     -0.01571 |       0.00000 |      40.60309 |       0.00828 |       0.68463
     -0.01821 |       0.00000 |      20.92135 |       0.00903 |       0.68387
     -0.02093 |       0.00000 |      15.59072 |       0.01047 |       0.68243
     -0.02359 |       0.00000 |      14.34734 |       0.01211 |       0.68080
     -0.02502 |       0.00000 |      13.17979 |       0.01294 |       0.67998
     -0.02587 |       0.00000 |      11.84590 |       0.01353 |       0.67940
     -0.02663 |       0.00000 |      10.64370 |       0.01384 |       0.67909
     -0.02681 |       0.00000 |       9.77088 |       0.01366 |       0.67929
Evaluating losses...
     -0.02731 |       0.00000 |       9.43257 |       0.01418 |       0.67876
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00187 |       0.00000 |      88.61713 |       0.00011 |       0.69303
     -0.00862 |       0.00000 |      85.90702 |       0.00165 |       0.69147
     -0.01714 |       0.00000 |      82.67377 |       0.00552 |       0.68762
     -0.02056 |       0.00000 |      78.68813 |       0.00881 |       0.68438
     -0.02112 |       0.00000 |      73.98128 |       0.00964 |       0.68357
     -0.02209 |       0.00000 |      68.65023 |       0.01000 |       0.68321
     -0.02323 |       0.00000 |      62.83284 |       0.00976 |       0.68345
     -0.02474 |       0.00000 |      56.76096 |       0.00929 |       0.68390
     -0.02598 |       0.00000 |      50.67999 |       0.01037 |       0.68283
     -0.02785 |       0.00000 |      44.90030 |       0.01086 |       0.68235
Evaluating losses...
     -0.02883 |       0.00000 |      42.15079 |       0.01091 |       0.68229
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00758 |       0.00000 |      86.72017 |       0.00278 |       0.69044
     -0.01211 |       0.00000 |      73.26228 |       0.00786 |       0.68547
     -0.01346 |       0.00000 |      46.40466 |       0.00727 |       0.68603
     -0.01518 |       0.00000 |      23.64563 |       0.00788 |       0.68544
     -0.01668 |       0.00000 |      16.41652 |       0.00884 |       0.68450
     -0.01866 |       0.00000 |      15.04733 |       0.00946 |       0.68388
     -0.02061 |       0.00000 |      14.21743 |       0.01080 |       0.68257
     -0.02212 |       0.00000 |      13.24893 |       0.01177 |       0.68162
     -0.02310 |       0.00000 |      12.25443 |       0.01235 |       0.68105
     -0.02358 |       0.00000 |      11.37725 |       0.01231 |       0.68108
Evaluating losses...
     -0.02419 |       0.00000 |      10.99178 |       0.01391 |       0.67953
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00137 |       0.00000 |      92.50559 |      5.79e-05 |       0.69308
     -0.00642 |       0.00000 |      89.38921 |       0.00089 |       0.69223
     -0.01394 |       0.00000 |      85.45920 |       0.00444 |       0.68869
     -0.01774 |       0.00000 |      80.47161 |       0.00811 |       0.68507
     -0.01826 |       0.00000 |      74.76327 |       0.00883 |       0.68438
     -0.01913 |       0.00000 |      68.48612 |       0.00903 |       0.68418
     -0.01998 |       0.00000 |      62.02340 |       0.00883 |       0.68438
     -0.02091 |       0.00000 |      55.65561 |       0.00972 |       0.68349
     -0.02216 |       0.00000 |      49.53207 |       0.00900 |       0.68419
     -0.02322 |       0.00000 |      43.81945 |       0.01012 |       0.68310
Evaluating losses...
     -0.02382 |       0.00000 |      41.07608 |       0.00924 |       0.68395
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00676 |       0.00000 |      88.50479 |       0.00339 |       0.68987
     -0.01276 |       0.00000 |      74.16421 |       0.00688 |       0.68649
     -0.01408 |       0.00000 |      45.95168 |       0.00785 |       0.68554
     -0.01622 |       0.00000 |      23.20873 |       0.00818 |       0.68521
     -0.01930 |       0.00000 |      16.68135 |       0.00971 |       0.68372
     -0.02251 |       0.00000 |      15.36246 |       0.01146 |       0.68200
     -0.02442 |       0.00000 |      14.28770 |       0.01237 |       0.68111
     -0.02588 |       0.00000 |      13.05773 |       0.01342 |       0.68008
     -0.02669 |       0.00000 |      11.92596 |       0.01365 |       0.67985
     -0.02712 |       0.00000 |      11.04947 |       0.01394 |       0.67956
Evaluating losses...
     -0.02687 |       0.00000 |      10.69531 |       0.01152 |       0.68191
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00268 |       0.00000 |      83.83938 |       0.00020 |       0.69294
     -0.01057 |       0.00000 |      80.93345 |       0.00202 |       0.69112
     -0.01815 |       0.00000 |      77.54579 |       0.00696 |       0.68622
     -0.02010 |       0.00000 |      73.46679 |       0.00899 |       0.68422
     -0.02056 |       0.00000 |      68.68772 |       0.00947 |       0.68375
     -0.02124 |       0.00000 |      63.51662 |       0.00979 |       0.68343
     -0.02215 |       0.00000 |      58.11944 |       0.00946 |       0.68375
     -0.02339 |       0.00000 |      52.67985 |       0.00982 |       0.68340
     -0.02458 |       0.00000 |      47.34679 |       0.00989 |       0.68332
     -0.02604 |       0.00000 |      42.26043 |       0.01016 |       0.68305
Evaluating losses...
     -0.02669 |       0.00000 |      39.81181 |       0.01090 |       0.68232
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01116 |       0.00000 |      83.63636 |       0.00450 |       0.68880
     -0.01759 |       0.00000 |      71.36436 |       0.00868 |       0.68474
     -0.01944 |       0.00000 |      45.71411 |       0.00841 |       0.68498
     -0.02212 |       0.00000 |      23.20169 |       0.00952 |       0.68389
     -0.02536 |       0.00000 |      16.13130 |       0.01087 |       0.68256
     -0.02787 |       0.00000 |      14.43789 |       0.01186 |       0.68159
     -0.02938 |       0.00000 |      12.95727 |       0.01328 |       0.68020
     -0.02988 |       0.00000 |      11.43579 |       0.01333 |       0.68015
     -0.03024 |       0.00000 |      10.26444 |       0.01422 |       0.67928
     -0.03088 |       0.00000 |       9.51332 |       0.01402 |       0.67947
Evaluating losses...
     -0.03107 |       0.00000 |       9.24126 |       0.01376 |       0.67972
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00136 |       0.00000 |      88.68404 |      9.34e-05 |       0.69305
     -0.00625 |       0.00000 |      85.75031 |       0.00112 |       0.69204
     -0.01295 |       0.00000 |      82.38410 |       0.00476 |       0.68845
     -0.01620 |       0.00000 |      78.35806 |       0.00805 |       0.68523
     -0.01714 |       0.00000 |      73.65776 |       0.00918 |       0.68413
     -0.01821 |       0.00000 |      68.41404 |       0.00937 |       0.68394
     -0.01903 |       0.00000 |      62.83271 |       0.00897 |       0.68432
     -0.02017 |       0.00000 |      57.10818 |       0.00961 |       0.68370
     -0.02115 |       0.00000 |      51.54596 |       0.00999 |       0.68331
     -0.02242 |       0.00000 |      46.17475 |       0.01026 |       0.68304
Evaluating losses...
     -0.02318 |       0.00000 |      43.57072 |       0.00929 |       0.68399
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01078 |       0.00000 |      85.77930 |       0.00385 |       0.68956
     -0.01778 |       0.00000 |      72.07107 |       0.00814 |       0.68547
     -0.01956 |       0.00000 |      44.77187 |       0.00819 |       0.68542
     -0.02235 |       0.00000 |      22.42244 |       0.00924 |       0.68440
     -0.02508 |       0.00000 |      15.82591 |       0.01077 |       0.68290
     -0.02742 |       0.00000 |      14.30680 |       0.01214 |       0.68158
     -0.02868 |       0.00000 |      12.96957 |       0.01304 |       0.68070
     -0.02973 |       0.00000 |      11.66647 |       0.01372 |       0.68003
     -0.03018 |       0.00000 |      10.65904 |       0.01371 |       0.68004
     -0.03072 |       0.00000 |      10.03762 |       0.01410 |       0.67966
Evaluating losses...
     -0.03028 |       0.00000 |       9.80558 |       0.01656 |       0.67730
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00114 |       0.00000 |      85.31448 |      6.45e-05 |       0.69309
     -0.00499 |       0.00000 |      82.85036 |       0.00073 |       0.69242
     -0.01166 |       0.00000 |      79.82514 |       0.00342 |       0.68977
     -0.01583 |       0.00000 |      76.10680 |       0.00760 |       0.68567
     -0.01690 |       0.00000 |      71.67278 |       0.00879 |       0.68450
     -0.01747 |       0.00000 |      66.72908 |       0.00886 |       0.68443
     -0.01820 |       0.00000 |      61.45143 |       0.00865 |       0.68464
     -0.01886 |       0.00000 |      55.99997 |       0.00976 |       0.68355
     -0.02018 |       0.00000 |      50.62980 |       0.00897 |       0.68431
     -0.02122 |       0.00000 |      45.43185 |       0.00973 |       0.68356
Evaluating losses...
     -0.02159 |       0.00000 |      42.90942 |       0.01062 |       0.68269
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01054 |       0.00000 |      84.96121 |       0.00457 |       0.68873
     -0.01740 |       0.00000 |      71.27767 |       0.00823 |       0.68517
     -0.01847 |       0.00000 |      44.37114 |       0.00922 |       0.68420
     -0.02088 |       0.00000 |      22.47491 |       0.00896 |       0.68445
     -0.02399 |       0.00000 |      16.10423 |       0.01037 |       0.68306
     -0.02689 |       0.00000 |      14.74790 |       0.01204 |       0.68142
     -0.02849 |       0.00000 |      13.60727 |       0.01306 |       0.68042
     -0.02932 |       0.00000 |      12.32932 |       0.01367 |       0.67982
     -0.02977 |       0.00000 |      11.18341 |       0.01414 |       0.67936
     -0.03030 |       0.00000 |      10.32027 |       0.01406 |       0.67944
Evaluating losses...
     -0.03073 |       0.00000 |       9.97593 |       0.01364 |       0.67984
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00172 |       0.00000 |      91.16431 |       0.00013 |       0.69302
     -0.00685 |       0.00000 |      88.09100 |       0.00118 |       0.69196
     -0.01407 |       0.00000 |      84.59893 |       0.00527 |       0.68791
     -0.01695 |       0.00000 |      80.38404 |       0.00828 |       0.68494
     -0.01776 |       0.00000 |      75.49770 |       0.00792 |       0.68528
     -0.01861 |       0.00000 |      70.11081 |       0.00920 |       0.68403
     -0.01971 |       0.00000 |      64.39072 |       0.00880 |       0.68442
     -0.02096 |       0.00000 |      58.44753 |       0.00936 |       0.68386
     -0.02280 |       0.00000 |      52.44891 |       0.00955 |       0.68366
     -0.02338 |       0.00000 |      46.51954 |       0.01038 |       0.68285
Evaluating losses...
     -0.02465 |       0.00000 |      43.68196 |       0.01042 |       0.68281
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01003 |       0.00000 |      86.80737 |       0.00367 |       0.68947
     -0.01782 |       0.00000 |      73.89150 |       0.00846 |       0.68472
     -0.01926 |       0.00000 |      47.79050 |       0.00829 |       0.68488
     -0.02122 |       0.00000 |      24.37148 |       0.00849 |       0.68467
     -0.02391 |       0.00000 |      16.60320 |       0.01002 |       0.68316
     -0.02666 |       0.00000 |      14.92818 |       0.01077 |       0.68242
     -0.02814 |       0.00000 |      13.65452 |       0.01189 |       0.68131
     -0.02879 |       0.00000 |      12.32466 |       0.01270 |       0.68052
     -0.02957 |       0.00000 |      11.20051 |       0.01344 |       0.67979
     -0.03004 |       0.00000 |      10.40506 |       0.01366 |       0.67958
Evaluating losses...
     -0.03064 |       0.00000 |      10.10442 |       0.01379 |       0.67945
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00192 |       0.00000 |     100.19597 |       0.00012 |       0.69302
     -0.00719 |       0.00000 |      97.71710 |       0.00129 |       0.69187
     -0.01452 |       0.00000 |      94.74412 |       0.00489 |       0.68833
     -0.01710 |       0.00000 |      90.88320 |       0.00817 |       0.68513
     -0.01811 |       0.00000 |      86.07217 |       0.00859 |       0.68472
     -0.01853 |       0.00000 |      80.59219 |       0.00929 |       0.68404
     -0.01965 |       0.00000 |      74.51362 |       0.00884 |       0.68446
     -0.02077 |       0.00000 |      68.08072 |       0.00924 |       0.68407
     -0.02191 |       0.00000 |      61.57520 |       0.00904 |       0.68426
     -0.02327 |       0.00000 |      55.22828 |       0.00946 |       0.68384
Evaluating losses...
     -0.02399 |       0.00000 |      52.08939 |       0.00993 |       0.68339
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00952 |       0.00000 |      80.37733 |       0.00380 |       0.68935
     -0.01584 |       0.00000 |      67.03646 |       0.00812 |       0.68505
     -0.01783 |       0.00000 |      41.07569 |       0.00841 |       0.68475
     -0.02080 |       0.00000 |      20.55051 |       0.00925 |       0.68392
     -0.02364 |       0.00000 |      14.69239 |       0.01104 |       0.68215
     -0.02558 |       0.00000 |      13.30426 |       0.01239 |       0.68081
     -0.02672 |       0.00000 |      12.03395 |       0.01335 |       0.67987
     -0.02721 |       0.00000 |      10.79771 |       0.01342 |       0.67980
     -0.02747 |       0.00000 |       9.84383 |       0.01420 |       0.67904
     -0.02828 |       0.00000 |       9.21313 |       0.01432 |       0.67891
Evaluating losses...
     -0.02841 |       0.00000 |       8.97290 |       0.01315 |       0.68006
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00172 |       0.00000 |      93.96353 |      9.83e-05 |       0.69304
     -0.00683 |       0.00000 |      91.18021 |       0.00113 |       0.69199
     -0.01388 |       0.00000 |      87.96496 |       0.00481 |       0.68832
     -0.01660 |       0.00000 |      84.09916 |       0.00815 |       0.68503
     -0.01727 |       0.00000 |      79.50087 |       0.00855 |       0.68463
     -0.01785 |       0.00000 |      74.33804 |       0.00844 |       0.68474
     -0.01840 |       0.00000 |      68.80483 |       0.00900 |       0.68418
     -0.01938 |       0.00000 |      63.14201 |       0.00894 |       0.68424
     -0.02056 |       0.00000 |      57.50082 |       0.00936 |       0.68382
     -0.02152 |       0.00000 |      51.99289 |       0.00939 |       0.68378
Evaluating losses...
     -0.02240 |       0.00000 |      49.25425 |       0.01004 |       0.68314
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01006 |       0.00000 |      83.57249 |       0.00443 |       0.68878
     -0.01595 |       0.00000 |      70.84657 |       0.00812 |       0.68515
     -0.01774 |       0.00000 |      45.11484 |       0.00814 |       0.68512
     -0.01998 |       0.00000 |      23.06250 |       0.00883 |       0.68443
     -0.02298 |       0.00000 |      16.17977 |       0.00953 |       0.68374
     -0.02515 |       0.00000 |      14.46166 |       0.01098 |       0.68232
     -0.02718 |       0.00000 |      12.96764 |       0.01193 |       0.68139
     -0.02835 |       0.00000 |      11.51067 |       0.01249 |       0.68083
     -0.02914 |       0.00000 |      10.41126 |       0.01294 |       0.68039
     -0.02968 |       0.00000 |       9.71722 |       0.01327 |       0.68007
Evaluating losses...
     -0.02992 |       0.00000 |       9.47448 |       0.01205 |       0.68126
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00233 |       0.00000 |      83.86464 |       0.00015 |       0.69298
     -0.00893 |       0.00000 |      81.26073 |       0.00206 |       0.69105
     -0.01560 |       0.00000 |      78.19080 |       0.00559 |       0.68755
     -0.01841 |       0.00000 |      74.39555 |       0.00851 |       0.68466
     -0.01901 |       0.00000 |      69.82043 |       0.00964 |       0.68356
     -0.01972 |       0.00000 |      64.63410 |       0.00920 |       0.68398
     -0.02086 |       0.00000 |      59.08614 |       0.00943 |       0.68376
     -0.02224 |       0.00000 |      53.42078 |       0.00949 |       0.68368
     -0.02410 |       0.00000 |      47.85206 |       0.00985 |       0.68333
     -0.02543 |       0.00000 |      42.60376 |       0.01051 |       0.68267
Evaluating losses...
     -0.02563 |       0.00000 |      40.07147 |       0.01191 |       0.68131
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00695 |       0.00000 |      96.59986 |       0.00299 |       0.69030
     -0.01440 |       0.00000 |      82.27202 |       0.00753 |       0.68591
     -0.01661 |       0.00000 |      53.48556 |       0.00772 |       0.68571
     -0.01943 |       0.00000 |      27.43922 |       0.00856 |       0.68488
     -0.02229 |       0.00000 |      18.25175 |       0.00965 |       0.68380
     -0.02469 |       0.00000 |      16.39353 |       0.01105 |       0.68243
     -0.02619 |       0.00000 |      15.34683 |       0.01199 |       0.68151
     -0.02730 |       0.00000 |      14.17797 |       0.01255 |       0.68096
     -0.02817 |       0.00000 |      13.06109 |       0.01280 |       0.68071
     -0.02865 |       0.00000 |      12.17071 |       0.01331 |       0.68021
Evaluating losses...
     -0.02908 |       0.00000 |      11.82878 |       0.01213 |       0.68135
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00186 |       0.00000 |      87.98269 |      9.58e-05 |       0.69305
     -0.00801 |       0.00000 |      85.26841 |       0.00135 |       0.69180
     -0.01568 |       0.00000 |      82.02705 |       0.00547 |       0.68772
     -0.01813 |       0.00000 |      78.11652 |       0.00869 |       0.68456
     -0.01866 |       0.00000 |      73.56577 |       0.00899 |       0.68426
     -0.01949 |       0.00000 |      68.57825 |       0.00931 |       0.68394
     -0.02055 |       0.00000 |      63.35527 |       0.00850 |       0.68472
     -0.02171 |       0.00000 |      57.98093 |       0.00970 |       0.68355
     -0.02308 |       0.00000 |      52.67285 |       0.00975 |       0.68350
     -0.02461 |       0.00000 |      47.54073 |       0.01023 |       0.68302
Evaluating losses...
     -0.02541 |       0.00000 |      45.03322 |       0.01090 |       0.68235
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01272 |       0.00000 |      97.76014 |       0.00454 |       0.68859
     -0.01845 |       0.00000 |      83.77361 |       0.00799 |       0.68514
     -0.01985 |       0.00000 |      54.22309 |       0.00783 |       0.68529
     -0.02204 |       0.00000 |      27.54301 |       0.00863 |       0.68449
     -0.02499 |       0.00000 |      18.51421 |       0.00931 |       0.68381
     -0.02772 |       0.00000 |      16.72473 |       0.01121 |       0.68193
     -0.02962 |       0.00000 |      15.65644 |       0.01226 |       0.68090
     -0.03073 |       0.00000 |      14.44446 |       0.01303 |       0.68014
     -0.03130 |       0.00000 |      13.28132 |       0.01319 |       0.67998
     -0.03178 |       0.00000 |      12.34638 |       0.01388 |       0.67930
Evaluating losses...
     -0.03230 |       0.00000 |      11.96787 |       0.01334 |       0.67983
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00145 |       0.00000 |      84.86266 |      9.61e-05 |       0.69305
     -0.00599 |       0.00000 |      82.28469 |       0.00118 |       0.69196
     -0.01264 |       0.00000 |      79.27995 |       0.00494 |       0.68825
     -0.01541 |       0.00000 |      75.65120 |       0.00821 |       0.68504
     -0.01626 |       0.00000 |      71.39748 |       0.00893 |       0.68432
     -0.01705 |       0.00000 |      66.57960 |       0.00884 |       0.68441
     -0.01774 |       0.00000 |      61.37024 |       0.00932 |       0.68394
     -0.01896 |       0.00000 |      55.94129 |       0.00962 |       0.68364
     -0.01986 |       0.00000 |      50.52003 |       0.00965 |       0.68360
     -0.02113 |       0.00000 |      45.29208 |       0.01010 |       0.68316
Evaluating losses...
     -0.02204 |       0.00000 |      42.72930 |       0.01019 |       0.68306
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00716 |       0.00000 |      80.88441 |       0.00314 |       0.69010
     -0.01401 |       0.00000 |      67.94568 |       0.00783 |       0.68552
     -0.01581 |       0.00000 |      41.92405 |       0.00802 |       0.68532
     -0.01847 |       0.00000 |      20.97828 |       0.00895 |       0.68441
     -0.02159 |       0.00000 |      15.00598 |       0.01056 |       0.68283
     -0.02414 |       0.00000 |      13.70630 |       0.01207 |       0.68135
     -0.02570 |       0.00000 |      12.49923 |       0.01289 |       0.68054
     -0.02636 |       0.00000 |      11.16658 |       0.01343 |       0.68001
     -0.02696 |       0.00000 |      10.06217 |       0.01335 |       0.68009
     -0.02749 |       0.00000 |       9.29674 |       0.01370 |       0.67974
Evaluating losses...
     -0.02786 |       0.00000 |       9.02695 |       0.01326 |       0.68017
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00114 |       0.00000 |      86.17833 |      5.77e-05 |       0.69309
     -0.00507 |       0.00000 |      83.32222 |       0.00061 |       0.69254
     -0.01124 |       0.00000 |      79.79782 |       0.00294 |       0.69021
     -0.01618 |       0.00000 |      75.41805 |       0.00691 |       0.68630
     -0.01752 |       0.00000 |      70.32809 |       0.00921 |       0.68405
     -0.01801 |       0.00000 |      64.66235 |       0.00871 |       0.68454
     -0.01888 |       0.00000 |      58.59717 |       0.00893 |       0.68431
     -0.01997 |       0.00000 |      52.51880 |       0.00891 |       0.68433
     -0.02115 |       0.00000 |      46.68102 |       0.00965 |       0.68359
     -0.02282 |       0.00000 |      41.28949 |       0.00952 |       0.68372
Evaluating losses...
     -0.02321 |       0.00000 |      38.68190 |       0.00975 |       0.68349
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01150 |       0.00000 |      89.41138 |       0.00400 |       0.68900
     -0.01874 |       0.00000 |      76.73634 |       0.00897 |       0.68396
     -0.02018 |       0.00000 |      50.57710 |       0.00856 |       0.68437
     -0.02262 |       0.00000 |      26.18847 |       0.00905 |       0.68387
     -0.02494 |       0.00000 |      17.85469 |       0.00981 |       0.68310
     -0.02688 |       0.00000 |      16.19376 |       0.01082 |       0.68210
     -0.02862 |       0.00000 |      15.15574 |       0.01146 |       0.68146
     -0.02971 |       0.00000 |      13.99497 |       0.01221 |       0.68072
     -0.03082 |       0.00000 |      12.86701 |       0.01307 |       0.67986
     -0.03141 |       0.00000 |      11.95116 |       0.01342 |       0.67952
Evaluating losses...
     -0.03180 |       0.00000 |      11.55638 |       0.01452 |       0.67843
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00099 |       0.00000 |      91.02825 |      4.89e-05 |       0.69310
     -0.00453 |       0.00000 |      88.10783 |       0.00056 |       0.69258
     -0.01073 |       0.00000 |      84.54363 |       0.00293 |       0.69021
     -0.01477 |       0.00000 |      80.13275 |       0.00742 |       0.68579
     -0.01621 |       0.00000 |      74.94344 |       0.00854 |       0.68468
     -0.01693 |       0.00000 |      69.29430 |       0.00917 |       0.68406
     -0.01826 |       0.00000 |      63.36063 |       0.00893 |       0.68428
     -0.01931 |       0.00000 |      57.31698 |       0.00918 |       0.68404
     -0.02087 |       0.00000 |      51.42607 |       0.00983 |       0.68339
     -0.02245 |       0.00000 |      45.89394 |       0.01008 |       0.68315
Evaluating losses...
     -0.02322 |       0.00000 |      43.18209 |       0.01030 |       0.68293
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00865 |       0.00000 |      82.53620 |       0.00388 |       0.68929
     -0.01526 |       0.00000 |      69.81203 |       0.00761 |       0.68560
     -0.01681 |       0.00000 |      43.94694 |       0.00785 |       0.68536
     -0.01883 |       0.00000 |      22.08899 |       0.00883 |       0.68439
     -0.02054 |       0.00000 |      15.73664 |       0.00962 |       0.68362
     -0.02278 |       0.00000 |      14.48143 |       0.01081 |       0.68244
     -0.02389 |       0.00000 |      13.43010 |       0.01148 |       0.68178
     -0.02522 |       0.00000 |      12.30023 |       0.01287 |       0.68041
     -0.02602 |       0.00000 |      11.33615 |       0.01319 |       0.68009
     -0.02687 |       0.00000 |      10.65023 |       0.01362 |       0.67966
Evaluating losses...
     -0.02652 |       0.00000 |      10.37543 |       0.01154 |       0.68170
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00164 |       0.00000 |      83.49326 |      8.87e-05 |       0.69306
     -0.00621 |       0.00000 |      80.95441 |       0.00086 |       0.69230
     -0.01324 |       0.00000 |      77.90553 |       0.00400 |       0.68919
     -0.01653 |       0.00000 |      74.15546 |       0.00759 |       0.68568
     -0.01731 |       0.00000 |      69.73034 |       0.00894 |       0.68436
     -0.01805 |       0.00000 |      64.84048 |       0.00874 |       0.68455
     -0.01920 |       0.00000 |      59.72881 |       0.00906 |       0.68423
     -0.02066 |       0.00000 |      54.56396 |       0.00917 |       0.68411
     -0.02228 |       0.00000 |      49.48982 |       0.00985 |       0.68345
     -0.02344 |       0.00000 |      44.62377 |       0.01067 |       0.68263
Evaluating losses...
     -0.02407 |       0.00000 |      42.24941 |       0.01086 |       0.68245
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00851 |       0.00000 |      86.92313 |       0.00395 |       0.68916
     -0.01475 |       0.00000 |      73.45663 |       0.00774 |       0.68536
     -0.01649 |       0.00000 |      46.21992 |       0.00813 |       0.68497
     -0.01871 |       0.00000 |      23.37894 |       0.00843 |       0.68467
     -0.02072 |       0.00000 |      16.65288 |       0.01020 |       0.68292
     -0.02349 |       0.00000 |      15.50463 |       0.01093 |       0.68220
     -0.02508 |       0.00000 |      14.67143 |       0.01189 |       0.68124
     -0.02632 |       0.00000 |      13.71115 |       0.01297 |       0.68018
     -0.02693 |       0.00000 |      12.76725 |       0.01302 |       0.68012
     -0.02762 |       0.00000 |      11.94308 |       0.01346 |       0.67970
Evaluating losses...
     -0.02805 |       0.00000 |      11.57419 |       0.01417 |       0.67900
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00143 |       0.00000 |      85.85686 |      9.19e-05 |       0.69305
     -0.00637 |       0.00000 |      82.81683 |       0.00100 |       0.69212
     -0.01315 |       0.00000 |      79.22475 |       0.00459 |       0.68853
     -0.01683 |       0.00000 |      74.87701 |       0.00752 |       0.68564
     -0.01789 |       0.00000 |      69.87340 |       0.00919 |       0.68400
     -0.01884 |       0.00000 |      64.31127 |       0.00910 |       0.68409
     -0.01991 |       0.00000 |      58.51948 |       0.00932 |       0.68386
     -0.02110 |       0.00000 |      52.72434 |       0.00921 |       0.68397
     -0.02267 |       0.00000 |      47.04969 |       0.00972 |       0.68345
     -0.02396 |       0.00000 |      41.63604 |       0.01024 |       0.68294
Evaluating losses...
     -0.02477 |       0.00000 |      38.99641 |       0.01048 |       0.68271
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00916 |       0.00000 |      82.16679 |       0.00441 |       0.68898
     -0.01466 |       0.00000 |      68.53883 |       0.00750 |       0.68600
     -0.01594 |       0.00000 |      41.87343 |       0.00833 |       0.68520
     -0.01818 |       0.00000 |      20.86084 |       0.00874 |       0.68480
     -0.02165 |       0.00000 |      14.96911 |       0.00988 |       0.68368
     -0.02509 |       0.00000 |      13.58693 |       0.01166 |       0.68194
     -0.02692 |       0.00000 |      12.30129 |       0.01317 |       0.68047
     -0.02765 |       0.00000 |      11.01379 |       0.01346 |       0.68018
     -0.02846 |       0.00000 |      10.00237 |       0.01401 |       0.67964
     -0.02875 |       0.00000 |       9.33573 |       0.01374 |       0.67990
Evaluating losses...
     -0.02904 |       0.00000 |       9.08155 |       0.01487 |       0.67881
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00166 |       0.00000 |      82.31606 |       0.00017 |       0.69297
     -0.00726 |       0.00000 |      79.59416 |       0.00136 |       0.69176
     -0.01434 |       0.00000 |      76.41878 |       0.00492 |       0.68821
     -0.01742 |       0.00000 |      72.51272 |       0.00849 |       0.68469
     -0.01795 |       0.00000 |      67.82623 |       0.00887 |       0.68432
     -0.01878 |       0.00000 |      62.56624 |       0.00912 |       0.68407
     -0.01946 |       0.00000 |      56.82349 |       0.00934 |       0.68384
     -0.02048 |       0.00000 |      50.99678 |       0.00949 |       0.68370
     -0.02166 |       0.00000 |      45.31698 |       0.00972 |       0.68346
     -0.02296 |       0.00000 |      39.98411 |       0.01010 |       0.68309
Evaluating losses...
     -0.02336 |       0.00000 |      37.47189 |       0.01042 |       0.68276
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00703 |       0.00000 |      85.62186 |       0.00312 |       0.69004
     -0.01381 |       0.00000 |      72.39508 |       0.00774 |       0.68547
     -0.01624 |       0.00000 |      45.23688 |       0.00794 |       0.68527
     -0.01910 |       0.00000 |      22.68958 |       0.00927 |       0.68396
     -0.02210 |       0.00000 |      16.06191 |       0.01089 |       0.68237
     -0.02331 |       0.00000 |      14.62074 |       0.01095 |       0.68230
     -0.02610 |       0.00000 |      13.39974 |       0.01231 |       0.68096
     -0.02756 |       0.00000 |      12.15026 |       0.01265 |       0.68063
     -0.02768 |       0.00000 |      11.08533 |       0.01286 |       0.68043
     -0.02890 |       0.00000 |      10.32239 |       0.01330 |       0.67999
Evaluating losses...
     -0.02941 |       0.00000 |      10.01059 |       0.01281 |       0.68047
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00165 |       0.00000 |      94.18080 |       0.00010 |       0.69304
     -0.00656 |       0.00000 |      91.45717 |       0.00124 |       0.69192
     -0.01341 |       0.00000 |      87.96700 |       0.00486 |       0.68835
     -0.01528 |       0.00000 |      83.49404 |       0.00771 |       0.68556
     -0.01586 |       0.00000 |      78.07503 |       0.00783 |       0.68544
     -0.01636 |       0.00000 |      72.06103 |       0.00856 |       0.68472
     -0.01710 |       0.00000 |      65.69804 |       0.00818 |       0.68509
     -0.01802 |       0.00000 |      59.24593 |       0.00873 |       0.68455
     -0.01919 |       0.00000 |      53.00270 |       0.00914 |       0.68415
     -0.02010 |       0.00000 |      47.13163 |       0.00915 |       0.68413
Evaluating losses...
     -0.02059 |       0.00000 |      44.31667 |       0.01023 |       0.68307
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01058 |       0.00000 |      88.58672 |       0.00447 |       0.68863
     -0.01795 |       0.00000 |      75.19588 |       0.00806 |       0.68503
     -0.02031 |       0.00000 |      47.89527 |       0.00897 |       0.68413
     -0.02398 |       0.00000 |      24.47131 |       0.00918 |       0.68390
     -0.02690 |       0.00000 |      16.90529 |       0.01149 |       0.68162
     -0.02985 |       0.00000 |      15.34335 |       0.01213 |       0.68098
     -0.03118 |       0.00000 |      14.17710 |       0.01300 |       0.68013
     -0.03168 |       0.00000 |      12.93858 |       0.01359 |       0.67955
     -0.03250 |       0.00000 |      11.81848 |       0.01376 |       0.67938
     -0.03298 |       0.00000 |      10.96789 |       0.01393 |       0.67921
Evaluating losses...
     -0.03318 |       0.00000 |      10.61467 |       0.01277 |       0.68035
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00080 |       0.00000 |      95.52248 |       0.00011 |       0.69305
     -0.00317 |       0.00000 |      93.00384 |       0.00081 |       0.69235
     -0.00732 |       0.00000 |      90.04915 |       0.00211 |       0.69106
     -0.01057 |       0.00000 |      86.28699 |       0.00516 |       0.68806
     -0.01327 |       0.00000 |      81.69765 |       0.00627 |       0.68694
     -0.01463 |       0.00000 |      76.39836 |       0.00856 |       0.68469
     -0.01570 |       0.00000 |      70.53911 |       0.00822 |       0.68502
     -0.01706 |       0.00000 |      64.32352 |       0.00878 |       0.68446
     -0.01852 |       0.00000 |      58.05286 |       0.00881 |       0.68442
     -0.02036 |       0.00000 |      51.98009 |       0.00957 |       0.68367
Evaluating losses...
     -0.02105 |       0.00000 |      48.97262 |       0.01061 |       0.68266
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00885 |       0.00000 |      81.51525 |       0.00365 |       0.68966
     -0.01447 |       0.00000 |      68.35014 |       0.00842 |       0.68505
     -0.01646 |       0.00000 |      42.61192 |       0.00785 |       0.68559
     -0.01897 |       0.00000 |      22.08728 |       0.00905 |       0.68442
     -0.02227 |       0.00000 |      16.14538 |       0.00983 |       0.68365
     -0.02462 |       0.00000 |      14.81061 |       0.01138 |       0.68213
     -0.02612 |       0.00000 |      13.56570 |       0.01252 |       0.68102
     -0.02748 |       0.00000 |      12.21151 |       0.01300 |       0.68055
     -0.02788 |       0.00000 |      11.00304 |       0.01293 |       0.68061
     -0.02856 |       0.00000 |      10.18316 |       0.01393 |       0.67964
Evaluating losses...
     -0.02885 |       0.00000 |       9.87381 |       0.01438 |       0.67920
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00120 |       0.00000 |      88.14846 |      6.13e-05 |       0.69309
     -0.00443 |       0.00000 |      85.27010 |       0.00080 |       0.69234
     -0.00965 |       0.00000 |      81.76662 |       0.00313 |       0.69004
     -0.01365 |       0.00000 |      77.43726 |       0.00668 |       0.68653
     -0.01492 |       0.00000 |      72.35296 |       0.00826 |       0.68498
     -0.01619 |       0.00000 |      66.66863 |       0.00841 |       0.68482
     -0.01731 |       0.00000 |      60.67569 |       0.00905 |       0.68419
     -0.01875 |       0.00000 |      54.59451 |       0.00996 |       0.68330
     -0.02006 |       0.00000 |      48.65522 |       0.01031 |       0.68294
     -0.02074 |       0.00000 |      43.07678 |       0.01075 |       0.68252
Evaluating losses...
     -0.02154 |       0.00000 |      40.40174 |       0.01027 |       0.68298
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00951 |       0.00000 |      88.37724 |       0.00460 |       0.68839
     -0.01548 |       0.00000 |      75.87960 |       0.00770 |       0.68522
     -0.01685 |       0.00000 |      49.44958 |       0.00834 |       0.68458
     -0.01888 |       0.00000 |      25.16249 |       0.00824 |       0.68466
     -0.02170 |       0.00000 |      16.97164 |       0.00968 |       0.68324
     -0.02434 |       0.00000 |      15.34764 |       0.01080 |       0.68211
     -0.02589 |       0.00000 |      14.26207 |       0.01181 |       0.68112
     -0.02707 |       0.00000 |      13.04556 |       0.01275 |       0.68019
     -0.02745 |       0.00000 |      11.98800 |       0.01309 |       0.67986
     -0.02784 |       0.00000 |      11.21813 |       0.01332 |       0.67963
Evaluating losses...
     -0.02851 |       0.00000 |      10.91008 |       0.01229 |       0.68065
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00113 |       0.00000 |      88.37742 |      4.10e-05 |       0.69311
     -0.00498 |       0.00000 |      84.97757 |       0.00061 |       0.69254
     -0.01221 |       0.00000 |      80.96909 |       0.00351 |       0.68967
     -0.01726 |       0.00000 |      76.27531 |       0.00784 |       0.68541
     -0.01832 |       0.00000 |      70.97187 |       0.00889 |       0.68438
     -0.01962 |       0.00000 |      65.22594 |       0.00881 |       0.68445
     -0.02137 |       0.00000 |      59.18248 |       0.00917 |       0.68409
     -0.02332 |       0.00000 |      53.07147 |       0.00970 |       0.68356
     -0.02490 |       0.00000 |      47.09431 |       0.01028 |       0.68299
     -0.02656 |       0.00000 |      41.46783 |       0.01107 |       0.68221
Evaluating losses...
     -0.02681 |       0.00000 |      38.76875 |       0.01088 |       0.68239
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00978 |       0.00000 |      81.95474 |       0.00442 |       0.68873
     -0.01685 |       0.00000 |      69.18091 |       0.00799 |       0.68519
     -0.01868 |       0.00000 |      43.41829 |       0.00885 |       0.68434
     -0.02176 |       0.00000 |      21.54958 |       0.00930 |       0.68388
     -0.02453 |       0.00000 |      14.89816 |       0.01096 |       0.68224
     -0.02648 |       0.00000 |      13.36716 |       0.01184 |       0.68137
     -0.02738 |       0.00000 |      12.06706 |       0.01263 |       0.68059
     -0.02808 |       0.00000 |      10.81265 |       0.01296 |       0.68028
     -0.02817 |       0.00000 |       9.85671 |       0.01290 |       0.68033
     -0.02853 |       0.00000 |       9.25306 |       0.01323 |       0.68001
Evaluating losses...
     -0.02894 |       0.00000 |       9.02878 |       0.01421 |       0.67905
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00134 |       0.00000 |      86.91966 |      8.04e-05 |       0.69307
     -0.00554 |       0.00000 |      84.38322 |       0.00091 |       0.69226
     -0.01160 |       0.00000 |      81.45303 |       0.00415 |       0.68906
     -0.01459 |       0.00000 |      77.96881 |       0.00738 |       0.68590
     -0.01536 |       0.00000 |      73.97223 |       0.00836 |       0.68495
     -0.01580 |       0.00000 |      69.56573 |       0.00872 |       0.68459
     -0.01658 |       0.00000 |      64.87326 |       0.00886 |       0.68445
     -0.01779 |       0.00000 |      60.02093 |       0.00834 |       0.68495
     -0.01883 |       0.00000 |      55.12686 |       0.00949 |       0.68382
     -0.02029 |       0.00000 |      50.37017 |       0.00971 |       0.68360
Evaluating losses...
     -0.02104 |       0.00000 |      47.94617 |       0.00991 |       0.68340
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01053 |       0.00000 |      99.14794 |       0.00382 |       0.68939
     -0.01605 |       0.00000 |      84.44016 |       0.00797 |       0.68532
     -0.01790 |       0.00000 |      54.17299 |       0.00785 |       0.68542
     -0.02074 |       0.00000 |      27.59745 |       0.00887 |       0.68442
     -0.02403 |       0.00000 |      18.81672 |       0.01017 |       0.68314
     -0.02638 |       0.00000 |      16.94233 |       0.01153 |       0.68179
     -0.02839 |       0.00000 |      15.68450 |       0.01253 |       0.68081
     -0.02964 |       0.00000 |      14.31844 |       0.01286 |       0.68049
     -0.03027 |       0.00000 |      13.11137 |       0.01304 |       0.68031
     -0.03039 |       0.00000 |      12.23154 |       0.01345 |       0.67990
Evaluating losses...
     -0.03115 |       0.00000 |      11.87796 |       0.01290 |       0.68043
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00156 |       0.00000 |      91.65997 |      9.21e-05 |       0.69306
     -0.00564 |       0.00000 |      88.60086 |       0.00110 |       0.69207
     -0.01170 |       0.00000 |      85.19992 |       0.00412 |       0.68910
     -0.01453 |       0.00000 |      81.16812 |       0.00803 |       0.68527
     -0.01526 |       0.00000 |      76.37943 |       0.00875 |       0.68457
     -0.01560 |       0.00000 |      70.88388 |       0.00893 |       0.68440
     -0.01598 |       0.00000 |      64.90153 |       0.00887 |       0.68446
     -0.01661 |       0.00000 |      58.70436 |       0.00905 |       0.68428
     -0.01761 |       0.00000 |      52.56772 |       0.00937 |       0.68396
     -0.01856 |       0.00000 |      46.60499 |       0.00914 |       0.68418
Evaluating losses...
     -0.01924 |       0.00000 |      43.72079 |       0.01002 |       0.68331
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01229 |       0.00000 |      88.76473 |       0.00510 |       0.68802
     -0.01854 |       0.00000 |      75.82546 |       0.00745 |       0.68567
     -0.02105 |       0.00000 |      49.17121 |       0.00823 |       0.68489
     -0.02422 |       0.00000 |      24.79069 |       0.00952 |       0.68361
     -0.02674 |       0.00000 |      16.31375 |       0.01092 |       0.68223
     -0.02840 |       0.00000 |      14.42553 |       0.01212 |       0.68105
     -0.02924 |       0.00000 |      13.13530 |       0.01261 |       0.68056
     -0.02954 |       0.00000 |      11.89845 |       0.01318 |       0.68000
     -0.03084 |       0.00000 |      10.92346 |       0.01323 |       0.67995
     -0.03124 |       0.00000 |      10.28448 |       0.01373 |       0.67946
Evaluating losses...
     -0.03096 |       0.00000 |      10.05998 |       0.01141 |       0.68174
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00139 |       0.00000 |      85.71873 |      6.65e-05 |       0.69307
     -0.00597 |       0.00000 |      83.18859 |       0.00099 |       0.69212
     -0.01239 |       0.00000 |      80.20806 |       0.00463 |       0.68848
     -0.01594 |       0.00000 |      76.65813 |       0.00787 |       0.68528
     -0.01634 |       0.00000 |      72.52435 |       0.00891 |       0.68426
     -0.01685 |       0.00000 |      67.91741 |       0.00880 |       0.68437
     -0.01749 |       0.00000 |      62.97832 |       0.00898 |       0.68419
     -0.01802 |       0.00000 |      57.87613 |       0.00879 |       0.68437
     -0.01864 |       0.00000 |      52.60880 |       0.00933 |       0.68384
     -0.01952 |       0.00000 |      47.30793 |       0.00925 |       0.68391
Evaluating losses...
     -0.01994 |       0.00000 |      44.67251 |       0.00936 |       0.68380
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01029 |       0.00000 |      89.96948 |       0.00430 |       0.68895
     -0.01694 |       0.00000 |      76.27279 |       0.00844 |       0.68490
     -0.01832 |       0.00000 |      48.80229 |       0.00845 |       0.68488
     -0.02043 |       0.00000 |      25.01520 |       0.00896 |       0.68438
     -0.02288 |       0.00000 |      17.32790 |       0.00953 |       0.68382
     -0.02546 |       0.00000 |      15.77546 |       0.01085 |       0.68251
     -0.02742 |       0.00000 |      14.67733 |       0.01191 |       0.68147
     -0.02835 |       0.00000 |      13.45149 |       0.01276 |       0.68064
     -0.02942 |       0.00000 |      12.31230 |       0.01337 |       0.68005
     -0.03030 |       0.00000 |      11.41540 |       0.01331 |       0.68010
Evaluating losses...
     -0.03074 |       0.00000 |      11.05714 |       0.01310 |       0.68030
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00118 |       0.00000 |      86.13819 |      5.53e-05 |       0.69309
     -0.00552 |       0.00000 |      83.20480 |       0.00075 |       0.69241
     -0.01201 |       0.00000 |      79.74085 |       0.00384 |       0.68936
     -0.01658 |       0.00000 |      75.62206 |       0.00738 |       0.68589
     -0.01839 |       0.00000 |      70.86527 |       0.00845 |       0.68485
     -0.01940 |       0.00000 |      65.63837 |       0.00939 |       0.68392
     -0.02057 |       0.00000 |      60.11790 |       0.00934 |       0.68397
     -0.02189 |       0.00000 |      54.41346 |       0.00952 |       0.68378
     -0.02321 |       0.00000 |      48.72485 |       0.01022 |       0.68309
     -0.02440 |       0.00000 |      43.22836 |       0.01043 |       0.68288
Evaluating losses...
     -0.02505 |       0.00000 |      40.60149 |       0.01085 |       0.68246
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00888 |       0.00000 |      93.20151 |       0.00308 |       0.69013
     -0.01650 |       0.00000 |      79.71393 |       0.00821 |       0.68511
     -0.01825 |       0.00000 |      51.62613 |       0.00840 |       0.68491
     -0.02094 |       0.00000 |      26.13157 |       0.00844 |       0.68486
     -0.02441 |       0.00000 |      17.34620 |       0.01028 |       0.68304
     -0.02709 |       0.00000 |      15.50901 |       0.01158 |       0.68177
     -0.02860 |       0.00000 |      14.29455 |       0.01263 |       0.68074
     -0.02976 |       0.00000 |      12.97363 |       0.01291 |       0.68045
     -0.03029 |       0.00000 |      11.84775 |       0.01351 |       0.67986
     -0.03108 |       0.00000 |      11.06252 |       0.01373 |       0.67964
Evaluating losses...
     -0.03155 |       0.00000 |      10.78165 |       0.01401 |       0.67938
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00170 |       0.00000 |      88.49669 |       0.00011 |       0.69303
     -0.00705 |       0.00000 |      85.17561 |       0.00117 |       0.69198
     -0.01437 |       0.00000 |      81.27044 |       0.00471 |       0.68848
     -0.01693 |       0.00000 |      76.64713 |       0.00838 |       0.68489
     -0.01749 |       0.00000 |      71.35281 |       0.00896 |       0.68432
     -0.01783 |       0.00000 |      65.54037 |       0.00896 |       0.68432
     -0.01855 |       0.00000 |      59.46485 |       0.00862 |       0.68465
     -0.01914 |       0.00000 |      53.44049 |       0.00928 |       0.68400
     -0.02018 |       0.00000 |      47.62350 |       0.00961 |       0.68368
     -0.02114 |       0.00000 |      42.09818 |       0.00914 |       0.68413
Evaluating losses...
     -0.02130 |       0.00000 |      39.41859 |       0.01107 |       0.68225
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00903 |       0.00000 |      85.36678 |       0.00365 |       0.68944
     -0.01550 |       0.00000 |      71.73782 |       0.00764 |       0.68544
     -0.01766 |       0.00000 |      44.95382 |       0.00845 |       0.68463
     -0.02090 |       0.00000 |      22.74886 |       0.00939 |       0.68369
     -0.02374 |       0.00000 |      16.10691 |       0.01072 |       0.68238
     -0.02571 |       0.00000 |      14.76420 |       0.01186 |       0.68125
     -0.02692 |       0.00000 |      13.73971 |       0.01267 |       0.68045
     -0.02765 |       0.00000 |      12.60523 |       0.01286 |       0.68026
     -0.02785 |       0.00000 |      11.54000 |       0.01357 |       0.67956
     -0.02836 |       0.00000 |      10.69490 |       0.01344 |       0.67969
Evaluating losses...
     -0.02877 |       0.00000 |      10.34423 |       0.01324 |       0.67989
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00181 |       0.00000 |      80.52506 |       0.00013 |       0.69302
     -0.00693 |       0.00000 |      77.70776 |       0.00128 |       0.69187
     -0.01396 |       0.00000 |      74.30500 |       0.00497 |       0.68822
     -0.01679 |       0.00000 |      70.27742 |       0.00812 |       0.68513
     -0.01747 |       0.00000 |      65.70355 |       0.00918 |       0.68409
     -0.01822 |       0.00000 |      60.68586 |       0.00874 |       0.68452
     -0.01921 |       0.00000 |      55.38597 |       0.00959 |       0.68368
     -0.02044 |       0.00000 |      49.95122 |       0.01002 |       0.68325
     -0.02174 |       0.00000 |      44.65968 |       0.00977 |       0.68350
     -0.02304 |       0.00000 |      39.68333 |       0.01080 |       0.68248
Evaluating losses...
     -0.02377 |       0.00000 |      37.26538 |       0.01069 |       0.68259
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00792 |       0.00000 |      80.93052 |       0.00382 |       0.68949
     -0.01356 |       0.00000 |      68.15624 |       0.00764 |       0.68578
     -0.01572 |       0.00000 |      42.49289 |       0.00791 |       0.68551
     -0.01880 |       0.00000 |      21.27555 |       0.00941 |       0.68404
     -0.02232 |       0.00000 |      14.99894 |       0.01044 |       0.68302
     -0.02536 |       0.00000 |      13.49340 |       0.01221 |       0.68130
     -0.02707 |       0.00000 |      12.09609 |       0.01293 |       0.68059
     -0.02792 |       0.00000 |      10.69681 |       0.01304 |       0.68047
     -0.02840 |       0.00000 |       9.61255 |       0.01320 |       0.68031
     -0.02827 |       0.00000 |       8.91656 |       0.01407 |       0.67947
Evaluating losses...
     -0.02921 |       0.00000 |       8.66287 |       0.01282 |       0.68067
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00080 |       0.00000 |      93.90290 |      6.92e-05 |       0.69308
     -0.00365 |       0.00000 |      91.02256 |       0.00065 |       0.69250
     -0.00808 |       0.00000 |      87.56416 |       0.00233 |       0.69084
     -0.01204 |       0.00000 |      83.27555 |       0.00566 |       0.68758
     -0.01401 |       0.00000 |      78.17673 |       0.00737 |       0.68591
     -0.01443 |       0.00000 |      72.52226 |       0.00837 |       0.68492
     -0.01527 |       0.00000 |      66.42088 |       0.00817 |       0.68512
     -0.01599 |       0.00000 |      60.14690 |       0.00876 |       0.68454
     -0.01681 |       0.00000 |      54.02719 |       0.00863 |       0.68466
     -0.01784 |       0.00000 |      48.20129 |       0.00862 |       0.68467
Evaluating losses...
     -0.01841 |       0.00000 |      45.39780 |       0.00940 |       0.68390
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00762 |       0.00000 |      84.30891 |       0.00354 |       0.68953
     -0.01414 |       0.00000 |      71.56667 |       0.00786 |       0.68517
     -0.01558 |       0.00000 |      45.50863 |       0.00847 |       0.68456
     -0.01741 |       0.00000 |      22.97981 |       0.00861 |       0.68442
     -0.01988 |       0.00000 |      16.16081 |       0.00970 |       0.68333
     -0.02213 |       0.00000 |      14.80447 |       0.01116 |       0.68189
     -0.02359 |       0.00000 |      13.72684 |       0.01204 |       0.68101
     -0.02511 |       0.00000 |      12.48843 |       0.01276 |       0.68030
     -0.02583 |       0.00000 |      11.32491 |       0.01335 |       0.67971
     -0.02666 |       0.00000 |      10.44740 |       0.01395 |       0.67912
Evaluating losses...
     -0.02733 |       0.00000 |      10.10878 |       0.01330 |       0.67976
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00092 |       0.00000 |      87.42481 |      8.71e-05 |       0.69306
     -0.00373 |       0.00000 |      84.44479 |       0.00065 |       0.69249
     -0.00838 |       0.00000 |      80.88687 |       0.00280 |       0.69035
     -0.01213 |       0.00000 |      76.50471 |       0.00528 |       0.68791
     -0.01376 |       0.00000 |      71.18809 |       0.00785 |       0.68538
     -0.01452 |       0.00000 |      65.19778 |       0.00792 |       0.68532
     -0.01523 |       0.00000 |      58.96718 |       0.00862 |       0.68463
     -0.01634 |       0.00000 |      52.76704 |       0.00848 |       0.68476
     -0.01732 |       0.00000 |      46.80470 |       0.00862 |       0.68462
     -0.01892 |       0.00000 |      41.21956 |       0.00936 |       0.68389
Evaluating losses...
     -0.01986 |       0.00000 |      38.50515 |       0.00964 |       0.68361
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01115 |       0.00000 |      85.10641 |       0.00512 |       0.68827
     -0.01694 |       0.00000 |      72.37508 |       0.00804 |       0.68546
     -0.01869 |       0.00000 |      46.27902 |       0.00826 |       0.68524
     -0.02149 |       0.00000 |      23.51334 |       0.00879 |       0.68470
     -0.02407 |       0.00000 |      16.42047 |       0.01008 |       0.68345
     -0.02598 |       0.00000 |      14.94346 |       0.01122 |       0.68233
     -0.02683 |       0.00000 |      13.73331 |       0.01212 |       0.68146
     -0.02835 |       0.00000 |      12.49511 |       0.01257 |       0.68102
     -0.02878 |       0.00000 |      11.50487 |       0.01329 |       0.68031
     -0.02915 |       0.00000 |      10.80108 |       0.01323 |       0.68037
Evaluating losses...
     -0.02979 |       0.00000 |      10.53389 |       0.01459 |       0.67905
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00098 |       0.00000 |      85.51484 |      5.64e-05 |       0.69310
     -0.00417 |       0.00000 |      82.86191 |       0.00065 |       0.69251
     -0.00915 |       0.00000 |      79.76768 |       0.00270 |       0.69048
     -0.01341 |       0.00000 |      75.91510 |       0.00725 |       0.68599
     -0.01481 |       0.00000 |      71.30453 |       0.00837 |       0.68490
     -0.01581 |       0.00000 |      66.10905 |       0.00882 |       0.68445
     -0.01775 |       0.00000 |      60.61078 |       0.00915 |       0.68412
     -0.01902 |       0.00000 |      55.02247 |       0.00988 |       0.68338
     -0.02054 |       0.00000 |      49.48318 |       0.01056 |       0.68272
     -0.02138 |       0.00000 |      44.17678 |       0.01075 |       0.68253
Evaluating losses...
     -0.02227 |       0.00000 |      41.58756 |       0.01160 |       0.68171
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00856 |       0.00000 |      86.45615 |       0.00396 |       0.68923
     -0.01542 |       0.00000 |      73.85293 |       0.00792 |       0.68531
     -0.01686 |       0.00000 |      47.92786 |       0.00833 |       0.68490
     -0.01956 |       0.00000 |      24.57350 |       0.00868 |       0.68455
     -0.02312 |       0.00000 |      16.71875 |       0.01004 |       0.68320
     -0.02579 |       0.00000 |      14.99251 |       0.01174 |       0.68153
     -0.02747 |       0.00000 |      13.81841 |       0.01247 |       0.68081
     -0.02857 |       0.00000 |      12.59490 |       0.01306 |       0.68023
     -0.02947 |       0.00000 |      11.57526 |       0.01328 |       0.68001
     -0.02974 |       0.00000 |      10.86315 |       0.01389 |       0.67942
Evaluating losses...
     -0.03002 |       0.00000 |      10.58837 |       0.01491 |       0.67841
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00096 |       0.00000 |      87.25069 |      5.38e-05 |       0.69309
     -0.00481 |       0.00000 |      84.48468 |       0.00081 |       0.69234
     -0.01058 |       0.00000 |      80.97482 |       0.00340 |       0.68978
     -0.01480 |       0.00000 |      76.56532 |       0.00627 |       0.68694
     -0.01670 |       0.00000 |      71.29456 |       0.00878 |       0.68446
     -0.01725 |       0.00000 |      65.48101 |       0.00906 |       0.68417
     -0.01803 |       0.00000 |      59.33017 |       0.00888 |       0.68434
     -0.01907 |       0.00000 |      53.15951 |       0.00935 |       0.68388
     -0.02039 |       0.00000 |      47.16577 |       0.00944 |       0.68379
     -0.02182 |       0.00000 |      41.51397 |       0.00968 |       0.68354
Evaluating losses...
     -0.02260 |       0.00000 |      38.82440 |       0.01016 |       0.68308
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00793 |       0.00000 |      86.57827 |       0.00277 |       0.69041
     -0.01454 |       0.00000 |      73.70017 |       0.00808 |       0.68517
     -0.01677 |       0.00000 |      47.22330 |       0.00818 |       0.68507
     -0.01971 |       0.00000 |      23.93271 |       0.00899 |       0.68427
     -0.02256 |       0.00000 |      16.42988 |       0.01029 |       0.68298
     -0.02488 |       0.00000 |      15.03283 |       0.01179 |       0.68151
     -0.02654 |       0.00000 |      14.04237 |       0.01256 |       0.68075
     -0.02750 |       0.00000 |      12.94136 |       0.01298 |       0.68034
     -0.02813 |       0.00000 |      11.87020 |       0.01365 |       0.67968
     -0.02898 |       0.00000 |      11.02006 |       0.01353 |       0.67980
Evaluating losses...
     -0.02931 |       0.00000 |      10.64828 |       0.01389 |       0.67944
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00112 |       0.00000 |      88.13337 |      8.17e-05 |       0.69306
     -0.00455 |       0.00000 |      85.64677 |       0.00056 |       0.69259
     -0.01040 |       0.00000 |      82.48499 |       0.00279 |       0.69036
     -0.01502 |       0.00000 |      78.42643 |       0.00652 |       0.68669
     -0.01607 |       0.00000 |      73.57141 |       0.00841 |       0.68483
     -0.01659 |       0.00000 |      68.05338 |       0.00907 |       0.68419
     -0.01765 |       0.00000 |      62.11687 |       0.00855 |       0.68468
     -0.01837 |       0.00000 |      55.98840 |       0.00898 |       0.68427
     -0.01994 |       0.00000 |      50.01419 |       0.00961 |       0.68364
     -0.02107 |       0.00000 |      44.33762 |       0.00970 |       0.68355
Evaluating losses...
     -0.02219 |       0.00000 |      41.58295 |       0.00959 |       0.68365
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01019 |       0.00000 |      79.55873 |       0.00440 |       0.68873
     -0.01777 |       0.00000 |      67.19308 |       0.00818 |       0.68494
     -0.01992 |       0.00000 |      42.45264 |       0.00830 |       0.68482
     -0.02264 |       0.00000 |      21.49299 |       0.00954 |       0.68359
     -0.02527 |       0.00000 |      15.11649 |       0.01087 |       0.68228
     -0.02716 |       0.00000 |      13.69893 |       0.01180 |       0.68136
     -0.02854 |       0.00000 |      12.51734 |       0.01249 |       0.68068
     -0.02929 |       0.00000 |      11.30108 |       0.01336 |       0.67982
     -0.03000 |       0.00000 |      10.29088 |       0.01370 |       0.67948
     -0.03017 |       0.00000 |       9.58786 |       0.01388 |       0.67931
Evaluating losses...
     -0.03081 |       0.00000 |       9.30420 |       0.01405 |       0.67914
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00203 |       0.00000 |      91.76107 |       0.00011 |       0.69303
     -0.00887 |       0.00000 |      88.73766 |       0.00138 |       0.69175
     -0.01660 |       0.00000 |      85.14053 |       0.00613 |       0.68705
     -0.01902 |       0.00000 |      80.68127 |       0.00753 |       0.68565
     -0.01986 |       0.00000 |      75.44258 |       0.00867 |       0.68454
     -0.02083 |       0.00000 |      69.68076 |       0.00877 |       0.68444
     -0.02189 |       0.00000 |      63.68810 |       0.00895 |       0.68425
     -0.02360 |       0.00000 |      57.66762 |       0.00964 |       0.68357
     -0.02466 |       0.00000 |      51.69366 |       0.00988 |       0.68332
     -0.02633 |       0.00000 |      45.94888 |       0.01038 |       0.68283
Evaluating losses...
     -0.02700 |       0.00000 |      43.15401 |       0.01068 |       0.68253
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00893 |       0.00000 |      91.41645 |       0.00382 |       0.68942
     -0.01565 |       0.00000 |      77.49877 |       0.00799 |       0.68535
     -0.01779 |       0.00000 |      49.12630 |       0.00816 |       0.68517
     -0.02016 |       0.00000 |      24.87933 |       0.00890 |       0.68443
     -0.02314 |       0.00000 |      17.36728 |       0.00985 |       0.68350
     -0.02548 |       0.00000 |      15.85675 |       0.01142 |       0.68196
     -0.02698 |       0.00000 |      14.77089 |       0.01219 |       0.68121
     -0.02826 |       0.00000 |      13.61048 |       0.01261 |       0.68080
     -0.02910 |       0.00000 |      12.52509 |       0.01305 |       0.68036
     -0.02982 |       0.00000 |      11.69906 |       0.01324 |       0.68018
Evaluating losses...
     -0.03031 |       0.00000 |      11.36911 |       0.01392 |       0.67951
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00128 |       0.00000 |      89.62630 |      9.41e-05 |       0.69305
     -0.00501 |       0.00000 |      86.85034 |       0.00080 |       0.69235
     -0.01088 |       0.00000 |      83.30903 |       0.00328 |       0.68990
     -0.01527 |       0.00000 |      78.83947 |       0.00694 |       0.68630
     -0.01626 |       0.00000 |      73.53564 |       0.00850 |       0.68477
     -0.01696 |       0.00000 |      67.72350 |       0.00925 |       0.68404
     -0.01831 |       0.00000 |      61.63464 |       0.00904 |       0.68424
     -0.01918 |       0.00000 |      55.51136 |       0.00905 |       0.68421
     -0.02084 |       0.00000 |      49.60777 |       0.00914 |       0.68412
     -0.02186 |       0.00000 |      44.00003 |       0.00987 |       0.68341
Evaluating losses...
     -0.02280 |       0.00000 |      41.28654 |       0.01064 |       0.68264
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00904 |       0.00000 |      89.81337 |       0.00391 |       0.68921
     -0.01482 |       0.00000 |      75.76361 |       0.00804 |       0.68508
     -0.01657 |       0.00000 |      47.72309 |       0.00832 |       0.68481
     -0.01885 |       0.00000 |      24.51720 |       0.00848 |       0.68463
     -0.02140 |       0.00000 |      17.46414 |       0.00965 |       0.68347
     -0.02349 |       0.00000 |      16.12253 |       0.01093 |       0.68220
     -0.02484 |       0.00000 |      15.12794 |       0.01223 |       0.68092
     -0.02557 |       0.00000 |      13.96011 |       0.01276 |       0.68041
     -0.02673 |       0.00000 |      12.83259 |       0.01320 |       0.67997
     -0.02725 |       0.00000 |      11.95101 |       0.01366 |       0.67952
Evaluating losses...
     -0.02777 |       0.00000 |      11.58904 |       0.01388 |       0.67930
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00185 |       0.00000 |      83.55835 |       0.00011 |       0.69305
     -0.00778 |       0.00000 |      80.44443 |       0.00129 |       0.69188
     -0.01484 |       0.00000 |      76.76054 |       0.00499 |       0.68823
     -0.01794 |       0.00000 |      72.43085 |       0.00779 |       0.68550
     -0.01886 |       0.00000 |      67.56656 |       0.00904 |       0.68427
     -0.01990 |       0.00000 |      62.32152 |       0.00860 |       0.68469
     -0.02106 |       0.00000 |      56.84011 |       0.00945 |       0.68385
     -0.02269 |       0.00000 |      51.32582 |       0.00928 |       0.68402
     -0.02382 |       0.00000 |      46.01056 |       0.01035 |       0.68297
     -0.02531 |       0.00000 |      41.00217 |       0.01090 |       0.68242
Evaluating losses...
     -0.02599 |       0.00000 |      38.60733 |       0.01144 |       0.68189
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01243 |       0.00000 |      80.45092 |       0.00455 |       0.68869
     -0.01945 |       0.00000 |      67.43520 |       0.00860 |       0.68472
     -0.02076 |       0.00000 |      41.82167 |       0.00852 |       0.68479
     -0.02366 |       0.00000 |      20.95425 |       0.00916 |       0.68415
     -0.02744 |       0.00000 |      14.92234 |       0.01053 |       0.68280
     -0.03036 |       0.00000 |      13.49038 |       0.01261 |       0.68076
     -0.03209 |       0.00000 |      12.16936 |       0.01356 |       0.67983
     -0.03288 |       0.00000 |      10.78523 |       0.01404 |       0.67935
     -0.03331 |       0.00000 |       9.63813 |       0.01454 |       0.67886
     -0.03356 |       0.00000 |       8.87236 |       0.01528 |       0.67814
Evaluating losses...
     -0.03424 |       0.00000 |       8.59003 |       0.01475 |       0.67865
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00173 |       0.00000 |      93.38361 |       0.00012 |       0.69302
     -0.00668 |       0.00000 |      90.41450 |       0.00122 |       0.69191
     -0.01343 |       0.00000 |      86.90123 |       0.00452 |       0.68862
     -0.01638 |       0.00000 |      82.63811 |       0.00736 |       0.68582
     -0.01721 |       0.00000 |      77.71511 |       0.00845 |       0.68475
     -0.01788 |       0.00000 |      72.36057 |       0.00912 |       0.68409
     -0.01876 |       0.00000 |      66.71043 |       0.00860 |       0.68459
     -0.01995 |       0.00000 |      60.89065 |       0.00888 |       0.68430
     -0.02144 |       0.00000 |      55.10371 |       0.00925 |       0.68394
     -0.02278 |       0.00000 |      49.40883 |       0.00972 |       0.68347
Evaluating losses...
     -0.02322 |       0.00000 |      46.58011 |       0.01017 |       0.68302
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00863 |       0.00000 |      85.18339 |       0.00346 |       0.68983
     -0.01536 |       0.00000 |      71.40862 |       0.00882 |       0.68464
     -0.01737 |       0.00000 |      44.13279 |       0.00795 |       0.68546
     -0.02062 |       0.00000 |      22.43903 |       0.00917 |       0.68427
     -0.02326 |       0.00000 |      16.29219 |       0.01068 |       0.68279
     -0.02563 |       0.00000 |      14.88148 |       0.01118 |       0.68230
     -0.02715 |       0.00000 |      13.58574 |       0.01293 |       0.68060
     -0.02879 |       0.00000 |      12.19777 |       0.01307 |       0.68045
     -0.02939 |       0.00000 |      11.05283 |       0.01341 |       0.68012
     -0.02987 |       0.00000 |      10.25636 |       0.01345 |       0.68007
Evaluating losses...
     -0.03037 |       0.00000 |       9.96856 |       0.01500 |       0.67857
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00125 |       0.00000 |      83.90713 |      5.71e-05 |       0.69309
     -0.00650 |       0.00000 |      81.42889 |       0.00100 |       0.69214
     -0.01457 |       0.00000 |      78.44581 |       0.00469 |       0.68848
     -0.01722 |       0.00000 |      74.65028 |       0.00846 |       0.68477
     -0.01814 |       0.00000 |      69.98773 |       0.00880 |       0.68443
     -0.01883 |       0.00000 |      64.63139 |       0.00912 |       0.68411
     -0.01964 |       0.00000 |      58.81000 |       0.00930 |       0.68394
     -0.02058 |       0.00000 |      52.75383 |       0.00931 |       0.68392
     -0.02160 |       0.00000 |      46.73983 |       0.00942 |       0.68380
     -0.02321 |       0.00000 |      41.04107 |       0.00968 |       0.68354
Evaluating losses...
     -0.02404 |       0.00000 |      38.27923 |       0.01077 |       0.68247
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.01221 |       0.00000 |      91.25227 |       0.00454 |       0.68877
     -0.01819 |       0.00000 |      78.52476 |       0.00819 |       0.68523
     -0.01975 |       0.00000 |      51.70109 |       0.00815 |       0.68528
     -0.02219 |       0.00000 |      26.50723 |       0.00886 |       0.68457
     -0.02508 |       0.00000 |      17.98225 |       0.00966 |       0.68379
     -0.02697 |       0.00000 |      16.28886 |       0.01154 |       0.68196
     -0.02873 |       0.00000 |      15.21001 |       0.01209 |       0.68142
     -0.02948 |       0.00000 |      14.02462 |       0.01322 |       0.68031
     -0.03038 |       0.00000 |      12.90974 |       0.01336 |       0.68017
     -0.03061 |       0.00000 |      12.03411 |       0.01397 |       0.67957
Evaluating losses...
     -0.03099 |       0.00000 |      11.65387 |       0.01273 |       0.68077
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00160 |       0.00000 |      83.75403 |       0.00017 |       0.69298
     -0.00666 |       0.00000 |      80.94183 |       0.00212 |       0.69104
     -0.01119 |       0.00000 |      77.86915 |       0.00500 |       0.68821
     -0.01466 |       0.00000 |      74.15305 |       0.00636 |       0.68686
     -0.01756 |       0.00000 |      69.73478 |       0.00834 |       0.68491
     -0.01864 |       0.00000 |      64.78265 |       0.00940 |       0.68387
     -0.01989 |       0.00000 |      59.52032 |       0.00931 |       0.68395
     -0.02097 |       0.00000 |      54.04965 |       0.00948 |       0.68379
     -0.02208 |       0.00000 |      48.68857 |       0.01029 |       0.68299
     -0.02362 |       0.00000 |      43.51255 |       0.01068 |       0.68260
Evaluating losses...
     -0.02396 |       0.00000 |      41.03507 |       0.01060 |       0.68267
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00910 |       0.00000 |      93.43211 |       0.00403 |       0.68903
     -0.01557 |       0.00000 |      79.82188 |       0.00767 |       0.68535
     -0.01689 |       0.00000 |      51.85278 |       0.00791 |       0.68511
     -0.01915 |       0.00000 |      26.54080 |       0.00852 |       0.68450
     -0.02218 |       0.00000 |      18.01200 |       0.00939 |       0.68363
     -0.02459 |       0.00000 |      16.29358 |       0.01075 |       0.68228
     -0.02639 |       0.00000 |      15.16594 |       0.01215 |       0.68090
     -0.02806 |       0.00000 |      13.92322 |       0.01273 |       0.68032
     -0.02870 |       0.00000 |      12.81259 |       0.01323 |       0.67983
     -0.02950 |       0.00000 |      12.00123 |       0.01352 |       0.67955
Evaluating losses...
     -0.02929 |       0.00000 |      11.66571 |       0.01159 |       0.68145
-----------------------------

  "{} != {}".format(self.training_env, self.eval_env))


Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
     -0.00178 |       0.00000 |      92.79841 |       0.00018 |       0.69297
     -0.00709 |       0.00000 |      90.34549 |       0.00149 |       0.69167
     -0.01435 |       0.00000 |      87.34417 |       0.00540 |       0.68781
     -0.01657 |       0.00000 |      83.58577 |       0.00800 |       0.68526
     -0.01696 |       0.00000 |      79.13976 |       0.00896 |       0.68432
     -0.01777 |       0.00000 |      74.19498 |       0.00907 |       0.68422
     -0.01868 |       0.00000 |      68.83524 |       0.00895 |       0.68433
     -0.02013 |       0.00000 |      63.23062 |       0.00956 |       0.68373
     -0.02147 |       0.00000 |      57.58737 |       0.00981 |       0.68348
     -0.02266 |       0.00000 |      51.98813 |       0.01021 |       0.68307
Evaluating losses...
     -0.02332 |       0.00000 |      49.20932 |       0.01067 |       0.68263
-----------------------------

In [22]:
df.head()

Unnamed: 0,Seed,BNN reward,BNN std,DNN reward,DNN std
0,600.0,189.033,19.244737,181.186,30.742241
1,600.0,174.573,38.30818,173.827,39.677715
2,601.0,158.712,54.098254,174.281,37.204463
3,602.0,173.205,37.427329,183.545,26.022298
4,603.0,178.991,33.860403,167.562,44.853006


In [23]:
#export dataframe as csv so you dont have to run experiment again
df.to_csv(r'BNNCartpoleExperiment1.csv', index = False)

# Experiment Analysys

In [None]:
#load experiment data from csv
