In [1]:
import gym

from stable_baselines3 import PPO
from stable_baselines3 import SAC
from stable_baselines3 import mSAC
from stable_baselines3.common.evaluation import evaluate_policy, evaluate_meta_policy

import numpy as np
import torch as th
from gym import spaces

##pyfly stuff
from pyfly.pyfly import PyFly
from pyfly.pid_controller import PIDController
import numpy as np
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d.axes3d as p3
import matplotlib.animation as animation

In [2]:
th.manual_seed(42)
np.random.seed(666)

In [3]:
import numpy as np
from gym.envs.mujoco import HalfCheetahEnv as HalfCheetahEnv_

class HalfCheetahEnv(HalfCheetahEnv_):
    def _get_obs(self):
        return np.concatenate([
            self.sim.data.qpos.flat[1:],
            self.sim.data.qvel.flat,
            self.get_body_com("torso").flat,
        ]).astype(np.float32).flatten()

    def viewer_setup(self):
        camera_id = self.model.camera_name2id('track')
        self.viewer.cam.type = 2
        self.viewer.cam.fixedcamid = camera_id
        self.viewer.cam.distance = self.model.stat.extent * 0.35
        # Hide the overlay
        self.viewer._hide_overlay = True

    def render(self, mode='human'):
        if mode == 'rgb_array':
            self._get_viewer().render()
            # window size used for old mujoco-py:
            width, height = 500, 500
            data = self._get_viewer().read_pixels(width, height, depth=False)
            return data
        elif mode == 'human':
            self._get_viewer().render()
            
import numpy as np



class HalfCheetahVelEnv(HalfCheetahEnv):
    """Half-cheetah environment with target velocity, as described in [1]. The
    code is adapted from
    https://github.com/cbfinn/maml_rl/blob/9c8e2ebd741cb0c7b8bf2d040c4caeeb8e06cc95/rllab/envs/mujoco/half_cheetah_env_rand.py
    The half-cheetah follows the dynamics from MuJoCo [2], and receives at each
    time step a reward composed of a control cost and a penalty equal to the
    difference between its current velocity and the target velocity. The tasks
    are generated by sampling the target velocities from the uniform
    distribution on [0, 2].
    [1] Chelsea Finn, Pieter Abbeel, Sergey Levine, "Model-Agnostic
        Meta-Learning for Fast Adaptation of Deep Networks", 2017
        (https://arxiv.org/abs/1703.03400)
    [2] Emanuel Todorov, Tom Erez, Yuval Tassa, "MuJoCo: A physics engine for
        model-based control", 2012
        (https://homes.cs.washington.edu/~todorov/papers/TodorovIROS12.pdf)
    """
    def __init__(self, task={}, n_tasks=30000, randomize_tasks=True):
        self._task = task
        self.tasks = self.sample_tasks(n_tasks)
        self._goal_vel = self.tasks[0].get('velocity', 0.0)
        self._goal = self._goal_vel
        self.i = 0
        super(HalfCheetahVelEnv, self).__init__()

    def step(self, action):
        xposbefore = self.sim.data.qpos[0]
        self.do_simulation(action, self.frame_skip)
        xposafter = self.sim.data.qpos[0]

        forward_vel = (xposafter - xposbefore) / self.dt
        forward_reward = -1.0 * abs(forward_vel - self._goal_vel)
        ctrl_cost = 0.5 * 1e-1 * np.sum(np.square(action))

        observation = self._get_obs()
        reward = forward_reward - ctrl_cost
        if self.i >= 200:
            done = True
            self.i = 0
            print(self._goal_vel)
            del self.tasks[0]
            self._goal_vel = self.tasks[0].get('velocity', 0.0)
            self._goal = self._goal_vel
            
        else:
            done = False
            self.i += 1
        infos = dict(reward_forward=forward_reward,
            reward_ctrl=-ctrl_cost, task=self._task)
        return (observation, reward, done, infos)

    def sample_tasks(self, num_tasks):
        np.random.seed(666)
        print('goal sampled')
        velocities = np.random.choice([1.25,1.5], num_tasks)#np.random.uniform(0.0, 3.0, size=(num_tasks,))
        tasks = [{'velocity': velocity} for velocity in velocities]
        return tasks

    def get_all_task_idx(self):
        return range(len(self.tasks))

    def reset_task(self, idx):
        self._task = self.tasks[idx]
        self._goal_vel = self._task['velocity']
        self._goal = self._goal_vel
        self.reset()

In [4]:
env = HalfCheetahVelEnv(n_tasks = 5000000)#gym.make('BipedalWalker-v3')#FooEnv()#

meta_model = mSAC('MlpPolicy', env, verbose=1,policy_kwargs=dict(net_arch=[300, 300, 300], latent_dim = 5, hidden_sizes=[200,200,200]))#,learning_rate=0.0006)

for param in meta_model.actor.context_encoder.parameters():
    print(param.data)

meta_reward = []
meta_std = []

print('-Start-')
n_eval =30

meta_model_mean_reward_before, meta_model_std_reward_before = evaluate_meta_policy(meta_model, env, n_eval_episodes=30, add2buff = True)
meta_reward.append(meta_model_mean_reward_before)
meta_std.append(meta_model_std_reward_before)



print('##################################Start Learning##################################')
for i in range(200):
    
    meta_model.learn(total_timesteps=5*200)#, eval_freq=100, n_eval_episodes=5)
    meta_model_mean_reward, meta_model_std_reward = evaluate_meta_policy(meta_model, env, n_eval_episodes=n_eval)

    meta_reward.append(meta_model_mean_reward)
    meta_std.append(meta_model_std_reward)
    
    
    print('epoch:', i)
    print('meta_reward = ', meta_reward)
    print('meta_std = ', meta_std)
    
env.close()

goal sampled


  return torch._C._cuda_getDeviceCount() > 0


Using cpu device
Wrapping the env in a DummyVecEnv.
critic with  31
critic with  31
critic with  31
critic with  31
tensor([[ 0.0537,  0.0913,  0.1016,  ..., -0.1278,  0.1362,  0.1486],
        [-0.0029,  0.1456, -0.1328,  ..., -0.1428,  0.1556,  0.1592],
        [ 0.0502,  0.1107,  0.0090,  ..., -0.0497,  0.0032, -0.1174],
        ...,
        [ 0.0722,  0.0678,  0.0318,  ...,  0.1615, -0.1540,  0.0962],
        [-0.1173, -0.1402, -0.1299,  ...,  0.1444,  0.0846, -0.1017],
        [-0.1145, -0.0638, -0.0062,  ...,  0.1677, -0.0879,  0.1160]])
tensor([ 0.0213, -0.0235, -0.0045,  0.0180,  0.0009,  0.1305,  0.0564,  0.1421,
         0.0740, -0.1379, -0.0046,  0.0697, -0.1318, -0.0349,  0.0828,  0.0944,
         0.1098, -0.0579, -0.1336,  0.1232, -0.0062,  0.0809, -0.0710, -0.0821,
         0.0506, -0.0016, -0.1829, -0.1740, -0.0203,  0.0728,  0.0719, -0.1283,
         0.1463, -0.0662,  0.1097, -0.1368, -0.1147,  0.0393,  0.1382,  0.1829,
        -0.0557,  0.1467,  0.1531,  0.0893, -0.011

collect with prior
1.5
1.25
1.25
-----------------------------
| time/              |      |
|    episodes        | 4    |
|    fps             | 1132 |
|    time_elapsed    | 0    |
|    total timesteps | 804  |
-----------------------------
apply grads
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
epoch: 0
meta_reward =  [-274.91581210704345, -260.8228147446063]
meta_std =  [25.19121980635234, 52.63247580068557]
rollout
collect with prior
1.5
collect with prior
1.25
1.25
1.5
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 897          |
|    time_elapsed    | 0            |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 3.9373205    |
|    actor_loss      | -0.619       |
|    avg. z          | 0.0009659996 |
|    avg. z var      | 0.43882483   |
|    

collect with prior
1.5
collect with prior
1.25
1.25
1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 965           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.01573646    |
|    actor_loss      | 30.6          |
|    avg. z          | 5.0635448e-05 |
|    avg. z var      | 0.9898409     |
|    critic_loss     | 11.2          |
|    ent_coef        | 0.643         |
|    ent_coef_loss   | -3.95         |
|    learning_rate   | 0.0003        |
|    n_updates       | 1600          |
--------------------------------------
apply grads
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
epoch: 8
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19

collect with prior
1.5
1.5
1.5
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 1003          |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.012776657   |
|    actor_loss      | 61.1          |
|    avg. z          | -6.180401e-06 |
|    avg. z var      | 0.9936422     |
|    critic_loss     | 31.1          |
|    ent_coef        | 0.457         |
|    ent_coef_loss   | -6.47         |
|    learning_rate   | 0.0003        |
|    n_updates       | 2800          |
--------------------------------------
apply grads
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
epoch: 14
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.563252

apply grads
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.5
epoch: 19
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973]
meta_std =  [25.19121980635234, 52.63247580068557, 56.063545104874045, 54.08077979445108, 53.97525617275811, 52.75753755627775, 54.34410936771273, 55.332121462355964, 55.53156507538547, 56.15869451488047, 54.761914844806746, 57.431330295269845, 57.184168622607274, 57.38957484293908, 56.518049000254535, 56.93901821886115, 57.88493989054541, 57.84705396641816, 57.48270818092287, 56.156679314527985, 58.0

collect with prior
1.5
1.5
1.5
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 973            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.017637016    |
|    actor_loss      | 117            |
|    avg. z          | -2.5670332e-05 |
|    avg. z var      | 1.0003238      |
|    critic_loss     | 72.6           |
|    ent_coef        | 0.257          |
|    ent_coef_loss   | -10.5          |
|    learning_rate   | 0.0003         |
|    n_updates       | 4800           |
---------------------------------------
apply grads
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
epoch: 24
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.1967047488941

apply grads
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
epoch: 28
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354]
meta_std =  [25.19121980635234, 52.63247580068557, 56.063545104874045, 54.08077979445108, 53.97525617275811, 52.75753755627775, 54.34410936771273, 55.332121462355964, 55.53156507538547, 56.15869451488047, 54.761914844

apply grads
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.5
epoch: 32
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024]
meta_std =  [25.19121980635234, 52.63247580068557, 56.063545104874045, 54.08077979445108, 53.97525617275811, 52.75753755627775, 54.34410936

collect with prior
1.25
1.25
1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 967           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.003909576   |
|    actor_loss      | 187           |
|    avg. z          | -5.295692e-06 |
|    avg. z var      | 0.9979851     |
|    critic_loss     | 135           |
|    ent_coef        | 0.129         |
|    ent_coef_loss   | -14.4         |
|    learning_rate   | 0.0003        |
|    n_updates       | 7200          |
--------------------------------------
apply grads
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.25
epoch: 36
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632

1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
epoch: 39
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987955197]
meta_std =  [25.19121980635234, 52.63247580068557, 56.063545104874045, 54.080779794

collect with prior
1.5
1.25
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 999            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.00071190536  |
|    actor_loss      | 228            |
|    avg. z          | -5.3897106e-06 |
|    avg. z var      | 0.9994626      |
|    critic_loss     | 182            |
|    ent_coef        | 0.086          |
|    ent_coef_loss   | -16.4          |
|    learning_rate   | 0.0003         |
|    n_updates       | 8600           |
---------------------------------------
apply grads
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
epoch: 43
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.196704748

collect with prior
1.5
1.25
1.5
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 966          |
|    time_elapsed    | 0            |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 0.0021158797 |
|    actor_loss      | 246          |
|    avg. z          | 8.573117e-06 |
|    avg. z var      | 1.0012547    |
|    critic_loss     | 204          |
|    ent_coef        | 0.0725       |
|    ent_coef_loss   | -16.8        |
|    learning_rate   | 0.0003       |
|    n_updates       | 9200         |
-------------------------------------
apply grads
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
epoch: 46
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.

collect with prior
1.25
1.5
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 910            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.00071919966  |
|    actor_loss      | 264            |
|    avg. z          | -2.6409443e-06 |
|    avg. z var      | 0.9992384      |
|    critic_loss     | 228            |
|    ent_coef        | 0.0611         |
|    ent_coef_loss   | -17.3          |
|    learning_rate   | 0.0003         |
|    n_updates       | 9800           |
---------------------------------------
apply grads
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
epoch: 49
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.1967

collect with prior
1.25
1.25
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 932            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.005355692    |
|    actor_loss      | 280            |
|    avg. z          | -3.3968995e-06 |
|    avg. z var      | 1.000371       |
|    critic_loss     | 250            |
|    ent_coef        | 0.0514         |
|    ent_coef_loss   | -18.5          |
|    learning_rate   | 0.0003         |
|    n_updates       | 10400          |
---------------------------------------
apply grads
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
epoch: 52
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.1967047488

collect with prior
1.5
1.25
1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 994           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.007866653   |
|    actor_loss      | 296           |
|    avg. z          | 6.8818264e-07 |
|    avg. z var      | 1.0013397     |
|    critic_loss     | 266           |
|    ent_coef        | 0.0431        |
|    ent_coef_loss   | -19.3         |
|    learning_rate   | 0.0003        |
|    n_updates       | 11000         |
--------------------------------------
apply grads
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
epoch: 55
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522

collect with prior
1.5
1.5
1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 893           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00049893965 |
|    actor_loss      | 312           |
|    avg. z          | -9.599465e-06 |
|    avg. z var      | 0.9999566     |
|    critic_loss     | 286           |
|    ent_coef        | 0.0362        |
|    ent_coef_loss   | -19.6         |
|    learning_rate   | 0.0003        |
|    n_updates       | 11600         |
--------------------------------------
apply grads
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
epoch: 58
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.563

collect with prior
1.5
1.5
1.25
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 1001         |
|    time_elapsed    | 0            |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 0.0007783586 |
|    actor_loss      | 329          |
|    avg. z          | 1.0366e-05   |
|    avg. z var      | 0.9998604    |
|    critic_loss     | 302          |
|    ent_coef        | 0.0306       |
|    ent_coef_loss   | -19.6        |
|    learning_rate   | 0.0003       |
|    n_updates       | 12200        |
-------------------------------------
apply grads
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
epoch: 61
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.134

collect with prior
1.25
1.5
1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 975           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.0001829454  |
|    actor_loss      | 347           |
|    avg. z          | 4.1584353e-06 |
|    avg. z var      | 0.99991363    |
|    critic_loss     | 315           |
|    ent_coef        | 0.0258        |
|    ent_coef_loss   | -19.8         |
|    learning_rate   | 0.0003        |
|    n_updates       | 12800         |
--------------------------------------
apply grads
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
epoch: 64
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5

collect with prior
1.5
1.25
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 992            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.0004966538   |
|    actor_loss      | 363            |
|    avg. z          | -1.7190582e-06 |
|    avg. z var      | 0.9996769      |
|    critic_loss     | 349            |
|    ent_coef        | 0.0218         |
|    ent_coef_loss   | -20.3          |
|    learning_rate   | 0.0003         |
|    n_updates       | 13400          |
---------------------------------------
apply grads
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
epoch: 67
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.196704

1.25
epoch: 69
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987955197, -284.4311382104109, -279.25048678621056, -276.05985211773736, -278.47832821736006, -283.1864368767122, -272.6939753643122, -267.6947910

collect with prior
1.5
1.25
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 905            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.0006248897   |
|    actor_loss      | 388            |
|    avg. z          | 1.08991835e-05 |
|    avg. z var      | 1.0001643      |
|    critic_loss     | 360            |
|    ent_coef        | 0.0162         |
|    ent_coef_loss   | -22.6          |
|    learning_rate   | 0.0003         |
|    n_updates       | 14400          |
---------------------------------------
apply grads
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
epoch: 72
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.1967047488

apply grads
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
epoch: 74
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.593498795

collect with prior
1.25
1.5
1.5
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 1006          |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00053580315 |
|    actor_loss      | 413           |
|    avg. z          | 2.8171327e-05 |
|    avg. z var      | 0.999743      |
|    critic_loss     | 375           |
|    ent_coef        | 0.012         |
|    ent_coef_loss   | -24.8         |
|    learning_rate   | 0.0003        |
|    n_updates       | 15400         |
--------------------------------------
apply grads
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
epoch: 77
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.56325227

apply grads
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
epoch: 79
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987

apply grads
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
epoch: 81
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.59349879551

apply grads
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
epoch: 83
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987955

apply grads
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
epoch: 85
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.59349

apply grads
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.5
epoch: 87
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.593498795

collect with prior
1.25
1.25
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 1003           |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.00031908363  |
|    actor_loss      | 464            |
|    avg. z          | -6.8346494e-06 |
|    avg. z var      | 0.9998263      |
|    critic_loss     | 443            |
|    ent_coef        | 0.00586        |
|    ent_coef_loss   | -27.4          |
|    learning_rate   | 0.0003         |
|    n_updates       | 17800          |
---------------------------------------
apply grads
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
epoch: 89
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474

collect with prior
1.25
1.25
1.5
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 986            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.0010177412   |
|    actor_loss      | 473            |
|    avg. z          | -6.6351795e-06 |
|    avg. z var      | 1.0000987      |
|    critic_loss     | 451            |
|    ent_coef        | 0.00522        |
|    ent_coef_loss   | -27.4          |
|    learning_rate   | 0.0003         |
|    n_updates       | 18200          |
---------------------------------------
apply grads
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
epoch: 91
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.1967047

collect with prior
1.25
1.25
1.5
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 968            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.0004716555   |
|    actor_loss      | 480            |
|    avg. z          | -1.7836586e-05 |
|    avg. z var      | 0.99981755     |
|    critic_loss     | 455            |
|    ent_coef        | 0.00465        |
|    ent_coef_loss   | -27.5          |
|    learning_rate   | 0.0003         |
|    n_updates       | 18600          |
---------------------------------------
apply grads
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
epoch: 93
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474

collect with prior
1.25
1.25
1.5
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 988          |
|    time_elapsed    | 0            |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 0.0006812446 |
|    actor_loss      | 488          |
|    avg. z          | 4.781135e-06 |
|    avg. z var      | 0.99989593   |
|    critic_loss     | 462          |
|    ent_coef        | 0.00414      |
|    ent_coef_loss   | -27.9        |
|    learning_rate   | 0.0003       |
|    n_updates       | 19000        |
-------------------------------------
apply grads
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
epoch: 95
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.

collect with prior
1.5
1.5
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 948            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.0006870986   |
|    actor_loss      | 495            |
|    avg. z          | -1.8752955e-05 |
|    avg. z var      | 0.999772       |
|    critic_loss     | 475            |
|    ent_coef        | 0.00369        |
|    ent_coef_loss   | -28.4          |
|    learning_rate   | 0.0003         |
|    n_updates       | 19400          |
---------------------------------------
apply grads
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.25
epoch: 97
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.1967047

collect with prior
1.25
1.5
1.5
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 922            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.0010196553   |
|    actor_loss      | 502            |
|    avg. z          | -3.7464522e-06 |
|    avg. z var      | 1.000138       |
|    critic_loss     | 506            |
|    ent_coef        | 0.00328        |
|    ent_coef_loss   | -29            |
|    learning_rate   | 0.0003         |
|    n_updates       | 19800          |
---------------------------------------
apply grads
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
epoch: 99
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.196704

collect with prior
1.25
1.5
1.5
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 976            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.0011509139   |
|    actor_loss      | 509            |
|    avg. z          | -1.5862599e-05 |
|    avg. z var      | 0.9996703      |
|    critic_loss     | 512            |
|    ent_coef        | 0.00292        |
|    ent_coef_loss   | -28.5          |
|    learning_rate   | 0.0003         |
|    n_updates       | 20200          |
---------------------------------------
apply grads
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.5
epoch: 101
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.196704748

collect with prior
1.25
1.25
1.5
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 980           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.0015030808  |
|    actor_loss      | 515           |
|    avg. z          | 3.1823533e-06 |
|    avg. z var      | 1.00011       |
|    critic_loss     | 510           |
|    ent_coef        | 0.0026        |
|    ent_coef_loss   | -28.8         |
|    learning_rate   | 0.0003        |
|    n_updates       | 20600         |
--------------------------------------
apply grads
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.5
epoch: 103
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.563

collect with prior
1.5
1.5
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 980            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.00085767097  |
|    actor_loss      | 521            |
|    avg. z          | -5.9591953e-06 |
|    avg. z var      | 0.9997272      |
|    critic_loss     | 539            |
|    ent_coef        | 0.00232        |
|    ent_coef_loss   | -29.1          |
|    learning_rate   | 0.0003         |
|    n_updates       | 21000          |
---------------------------------------
apply grads
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
epoch: 105
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889

collect with prior
1.5
1.5
1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 990           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00038663525 |
|    actor_loss      | 526           |
|    avg. z          | 5.79955e-06   |
|    avg. z var      | 0.999863      |
|    critic_loss     | 553           |
|    ent_coef        | 0.00206       |
|    ent_coef_loss   | -28.8         |
|    learning_rate   | 0.0003        |
|    n_updates       | 21400         |
--------------------------------------
apply grads
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.25
epoch: 107
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632

collect with prior
1.5
1.5
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 992            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.00055222685  |
|    actor_loss      | 532            |
|    avg. z          | -6.9802795e-06 |
|    avg. z var      | 0.9999661      |
|    critic_loss     | 546            |
|    ent_coef        | 0.00184        |
|    ent_coef_loss   | -29.2          |
|    learning_rate   | 0.0003         |
|    n_updates       | 21800          |
---------------------------------------
apply grads
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
epoch: 109
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.1967047488

collect with prior
1.25
1.25
1.5
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 965            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.00027534657  |
|    actor_loss      | 537            |
|    avg. z          | -1.2297695e-06 |
|    avg. z var      | 0.9998609      |
|    critic_loss     | 562            |
|    ent_coef        | 0.00164        |
|    ent_coef_loss   | -28.8          |
|    learning_rate   | 0.0003         |
|    n_updates       | 22200          |
---------------------------------------
apply grads
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
epoch: 111
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.196704

collect with prior
1.25
1.5
1.5
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 947           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00063135533 |
|    actor_loss      | 542           |
|    avg. z          | 1.0292746e-05 |
|    avg. z var      | 0.99976784    |
|    critic_loss     | 580           |
|    ent_coef        | 0.00146       |
|    ent_coef_loss   | -28.8         |
|    learning_rate   | 0.0003        |
|    n_updates       | 22600         |
--------------------------------------
apply grads
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
epoch: 113
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.563252

collect with prior
1.25
1.25
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 981            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.002935648    |
|    actor_loss      | 547            |
|    avg. z          | -1.4271364e-05 |
|    avg. z var      | 0.9998006      |
|    critic_loss     | 595            |
|    ent_coef        | 0.0013         |
|    ent_coef_loss   | -29.5          |
|    learning_rate   | 0.0003         |
|    n_updates       | 23000          |
---------------------------------------
apply grads
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
epoch: 115
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670

collect with prior
1.5
1.25
1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 982           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00020736897 |
|    actor_loss      | 552           |
|    avg. z          | 2.6651173e-06 |
|    avg. z var      | 1.0000368     |
|    critic_loss     | 604           |
|    ent_coef        | 0.00116       |
|    ent_coef_loss   | -30.4         |
|    learning_rate   | 0.0003        |
|    n_updates       | 23400         |
--------------------------------------
apply grads
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
epoch: 117
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.56325

collect with prior
1.25
1.5
1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 979           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00018854397 |
|    actor_loss      | 557           |
|    avg. z          | -9.663668e-06 |
|    avg. z var      | 0.9999052     |
|    critic_loss     | 630           |
|    ent_coef        | 0.00103       |
|    ent_coef_loss   | -30.4         |
|    learning_rate   | 0.0003        |
|    n_updates       | 23800         |
--------------------------------------
apply grads
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
epoch: 119
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.56

collect with prior
1.5
1.5
1.5
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 889            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.00035629157  |
|    actor_loss      | 560            |
|    avg. z          | -2.9667342e-06 |
|    avg. z var      | 0.9998606      |
|    critic_loss     | 619            |
|    ent_coef        | 0.000916       |
|    ent_coef_loss   | -29.7          |
|    learning_rate   | 0.0003         |
|    n_updates       | 24200          |
---------------------------------------
apply grads
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
epoch: 121
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.1967047

collect with prior
1.5
1.5
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 986            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.0001888731   |
|    actor_loss      | 565            |
|    avg. z          | -4.5211627e-06 |
|    avg. z var      | 0.9999532      |
|    critic_loss     | 594            |
|    ent_coef        | 0.000816       |
|    ent_coef_loss   | -30.6          |
|    learning_rate   | 0.0003         |
|    n_updates       | 24600          |
---------------------------------------
apply grads
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
epoch: 123
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.196704748

collect with prior
1.25
1.25
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 984            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.0011031803   |
|    actor_loss      | 569            |
|    avg. z          | -2.6497733e-07 |
|    avg. z var      | 1.000066       |
|    critic_loss     | 623            |
|    ent_coef        | 0.000725       |
|    ent_coef_loss   | -31.6          |
|    learning_rate   | 0.0003         |
|    n_updates       | 25000          |
---------------------------------------
apply grads
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
epoch: 125
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.196704

collect with prior
1.25
1.5
1.5
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 977           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00058453524 |
|    actor_loss      | 573           |
|    avg. z          | -5.76071e-06  |
|    avg. z var      | 0.9997783     |
|    critic_loss     | 624           |
|    ent_coef        | 0.000643      |
|    ent_coef_loss   | -31.3         |
|    learning_rate   | 0.0003        |
|    n_updates       | 25400         |
--------------------------------------
apply grads
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
epoch: 127
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.56325227

collect with prior
1.25
1.25
1.5
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 918           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00012258181 |
|    actor_loss      | 576           |
|    avg. z          | 4.1618733e-07 |
|    avg. z var      | 0.99990684    |
|    critic_loss     | 595           |
|    ent_coef        | 0.000572      |
|    ent_coef_loss   | -31.3         |
|    learning_rate   | 0.0003        |
|    n_updates       | 25800         |
--------------------------------------
apply grads
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
epoch: 129
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.56

collect with prior
1.5
1.5
1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 955           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.0006188523  |
|    actor_loss      | 580           |
|    avg. z          | -2.682355e-06 |
|    avg. z var      | 0.9997464     |
|    critic_loss     | 602           |
|    ent_coef        | 0.000509      |
|    ent_coef_loss   | -31.7         |
|    learning_rate   | 0.0003        |
|    n_updates       | 26200         |
--------------------------------------
apply grads
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
epoch: 131
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632

collect with prior
1.25
1.5
1.5
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 988           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00026720378 |
|    actor_loss      | 584           |
|    avg. z          | 3.0167196e-06 |
|    avg. z var      | 0.99988997    |
|    critic_loss     | 642           |
|    ent_coef        | 0.000453      |
|    ent_coef_loss   | -31.5         |
|    learning_rate   | 0.0003        |
|    n_updates       | 26600         |
--------------------------------------
apply grads
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
epoch: 133
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.56325227

collect with prior
1.25
1.5
1.25
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 888          |
|    time_elapsed    | 0            |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 0.0004514482 |
|    actor_loss      | 587          |
|    avg. z          | 3.750464e-06 |
|    avg. z var      | 0.99989647   |
|    critic_loss     | 642          |
|    ent_coef        | 0.000404     |
|    ent_coef_loss   | -30.8        |
|    learning_rate   | 0.0003       |
|    n_updates       | 27000        |
-------------------------------------
apply grads
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
epoch: 135
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1

collect with prior
1.5
1.5
1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 985           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00065440923 |
|    actor_loss      | 590           |
|    avg. z          | -3.442477e-06 |
|    avg. z var      | 0.9999386     |
|    critic_loss     | 665           |
|    ent_coef        | 0.00036       |
|    ent_coef_loss   | -31.1         |
|    learning_rate   | 0.0003        |
|    n_updates       | 27400         |
--------------------------------------
apply grads
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
epoch: 137
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -2

collect with prior
1.5
1.5
1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 976           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00010749249 |
|    actor_loss      | 592           |
|    avg. z          | 6.035068e-07  |
|    avg. z var      | 0.9999153     |
|    critic_loss     | 654           |
|    ent_coef        | 0.000322      |
|    ent_coef_loss   | -29.5         |
|    learning_rate   | 0.0003        |
|    n_updates       | 27800         |
--------------------------------------
apply grads
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
epoch: 139
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717

collect with prior
1.25
1.5
1.5
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 971            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.00030702862  |
|    actor_loss      | 596            |
|    avg. z          | -1.0668879e-06 |
|    avg. z var      | 0.9998324      |
|    critic_loss     | 658            |
|    ent_coef        | 0.000287       |
|    ent_coef_loss   | -31            |
|    learning_rate   | 0.0003         |
|    n_updates       | 28200          |
---------------------------------------
apply grads
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.25
epoch: 141
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474

collect with prior
1.5
1.25
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 986         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 0.001113859 |
|    actor_loss      | 599         |
|    avg. z          | 3.80488e-06 |
|    avg. z var      | 1.0002989   |
|    critic_loss     | 635         |
|    ent_coef        | 0.000256    |
|    ent_coef_loss   | -29.5       |
|    learning_rate   | 0.0003      |
|    n_updates       | 28600       |
------------------------------------
apply grads
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
epoch: 143
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651,

collect with prior
1.5
1.25
1.5
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 919            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.00027146807  |
|    actor_loss      | 601            |
|    avg. z          | -3.1839763e-06 |
|    avg. z var      | 1.0001183      |
|    critic_loss     | 658            |
|    ent_coef        | 0.000229       |
|    ent_coef_loss   | -30.1          |
|    learning_rate   | 0.0003         |
|    n_updates       | 29000          |
---------------------------------------
apply grads
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.25
epoch: 145
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474

collect with prior
1.25
1.5
1.5
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 916           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00037723847 |
|    actor_loss      | 603           |
|    avg. z          | 1.0639432e-06 |
|    avg. z var      | 0.999669      |
|    critic_loss     | 686           |
|    ent_coef        | 0.000204      |
|    ent_coef_loss   | -28.9         |
|    learning_rate   | 0.0003        |
|    n_updates       | 29400         |
--------------------------------------
apply grads
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
epoch: 147
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.56325

collect with prior
1.5
1.25
1.5
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 941           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00012844171 |
|    actor_loss      | 606           |
|    avg. z          | 9.204571e-06  |
|    avg. z var      | 0.9999205     |
|    critic_loss     | 700           |
|    ent_coef        | 0.000183      |
|    ent_coef_loss   | -29.1         |
|    learning_rate   | 0.0003        |
|    n_updates       | 29800         |
--------------------------------------
apply grads
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
epoch: 149
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.56

collect with prior
1.5
1.25
1.5
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 1005          |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00021197913 |
|    actor_loss      | 608           |
|    avg. z          | 4.881018e-06  |
|    avg. z var      | 0.9999908     |
|    critic_loss     | 691           |
|    ent_coef        | 0.000164      |
|    ent_coef_loss   | -28.1         |
|    learning_rate   | 0.0003        |
|    n_updates       | 30200         |
--------------------------------------
apply grads
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
epoch: 151
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.56

collect with prior
1.25
1.5
1.5
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 972            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.000120928955 |
|    actor_loss      | 611            |
|    avg. z          | 2.6961222e-06  |
|    avg. z var      | 0.9999526      |
|    critic_loss     | 687            |
|    ent_coef        | 0.000146       |
|    ent_coef_loss   | -27.7          |
|    learning_rate   | 0.0003         |
|    n_updates       | 30600          |
---------------------------------------
apply grads
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
epoch: 153
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670

collect with prior
1.5
1.25
1.5
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 959            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.00045699265  |
|    actor_loss      | 613            |
|    avg. z          | -3.8187986e-06 |
|    avg. z var      | 0.999985       |
|    critic_loss     | 701            |
|    ent_coef        | 0.000131       |
|    ent_coef_loss   | -26.1          |
|    learning_rate   | 0.0003         |
|    n_updates       | 31000          |
---------------------------------------
apply grads
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
epoch: 155
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.1967

collect with prior
1.5
1.25
1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 994           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.0004325537  |
|    actor_loss      | 613           |
|    avg. z          | 1.2050199e-06 |
|    avg. z var      | 0.9998883     |
|    critic_loss     | 753           |
|    ent_coef        | 0.000118      |
|    ent_coef_loss   | -24.5         |
|    learning_rate   | 0.0003        |
|    n_updates       | 31400         |
--------------------------------------
apply grads
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
epoch: 157
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.56325227

collect with prior
1.5
1.5
1.25
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 907          |
|    time_elapsed    | 0            |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 0.0011994211 |
|    actor_loss      | 616          |
|    avg. z          | 2.2254e-07   |
|    avg. z var      | 0.9996934    |
|    critic_loss     | 756          |
|    ent_coef        | 0.000106     |
|    ent_coef_loss   | -23.8        |
|    learning_rate   | 0.0003       |
|    n_updates       | 31800        |
-------------------------------------
apply grads
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
epoch: 159
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273

collect with prior
1.5
1.25
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 892            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.0007400091   |
|    actor_loss      | 617            |
|    avg. z          | -4.2327356e-06 |
|    avg. z var      | 1.0003116      |
|    critic_loss     | 720            |
|    ent_coef        | 9.54e-05       |
|    ent_coef_loss   | -24.1          |
|    learning_rate   | 0.0003         |
|    n_updates       | 32200          |
---------------------------------------
apply grads
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
epoch: 161
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.196704

1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.25
epoch: 162
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987955197, -284.4311382104109, -279.25048678621056, -276.05985211773736, -278

collect with prior
1.5
1.25
1.5
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 985           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.0003370624  |
|    actor_loss      | 619           |
|    avg. z          | 4.4056296e-07 |
|    avg. z var      | 1.0001298     |
|    critic_loss     | 747           |
|    ent_coef        | 8.05e-05      |
|    ent_coef_loss   | -25.4         |
|    learning_rate   | 0.0003        |
|    n_updates       | 32800         |
--------------------------------------
apply grads
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
epoch: 164
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.56

apply grads
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
epoch: 165
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987955

collect with prior
1.5
1.25
1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 943           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 6.7216104e-05 |
|    actor_loss      | 621           |
|    avg. z          | -7.609263e-06 |
|    avg. z var      | 1.0000352     |
|    critic_loss     | 706           |
|    ent_coef        | 6.8e-05       |
|    ent_coef_loss   | -22.8         |
|    learning_rate   | 0.0003        |
|    n_updates       | 33400         |
--------------------------------------
apply grads
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.5
epoch: 167
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5

apply grads
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
epoch: 168
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.593498

collect with prior
1.25
1.5
1.5
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 914           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.0004735239  |
|    actor_loss      | 621           |
|    avg. z          | 1.1256776e-06 |
|    avg. z var      | 1.0002878     |
|    critic_loss     | 751           |
|    ent_coef        | 5.82e-05      |
|    ent_coef_loss   | -21.6         |
|    learning_rate   | 0.0003        |
|    n_updates       | 34000         |
--------------------------------------
apply grads
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
epoch: 170
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.563252

apply grads
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
epoch: 171
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.59349

collect with prior
1.5
1.5
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 910            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.00021767702  |
|    actor_loss      | 624            |
|    avg. z          | -1.9893172e-05 |
|    avg. z var      | 1.0000376      |
|    critic_loss     | 790            |
|    ent_coef        | 4.94e-05       |
|    ent_coef_loss   | -21.4          |
|    learning_rate   | 0.0003         |
|    n_updates       | 34600          |
---------------------------------------
apply grads
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
epoch: 173
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19

apply grads
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
epoch: 174
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.593498795

collect with prior
1.5
1.5
1.5
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 1032          |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00016676095 |
|    actor_loss      | 625           |
|    avg. z          | 2.6961412e-05 |
|    avg. z var      | 0.99993503    |
|    critic_loss     | 778           |
|    ent_coef        | 4.2e-05       |
|    ent_coef_loss   | -20.9         |
|    learning_rate   | 0.0003        |
|    n_updates       | 35200         |
--------------------------------------
apply grads
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
epoch: 176
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.56

apply grads
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
epoch: 177
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934

collect with prior
1.5
1.25
1.25
---------------------------------------
| time/              |                |
|    episodes        | 4              |
|    fps             | 952            |
|    time_elapsed    | 0              |
|    total timesteps | 804            |
| train/             |                |
|    KL_loss         | 0.00023654137  |
|    actor_loss      | 626            |
|    avg. z          | -1.3261553e-06 |
|    avg. z var      | 0.99970543     |
|    critic_loss     | 757            |
|    ent_coef        | 3.56e-05       |
|    ent_coef_loss   | -20            |
|    learning_rate   | 0.0003         |
|    n_updates       | 35800          |
---------------------------------------
apply grads
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
epoch: 179
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.1967047488941

apply grads
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
epoch: 180
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987

1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
epoch: 181
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987955197, -284.4311382104109, -279.250486

1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
epoch: 182
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987955197, -284.43113821041

apply grads
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.5
epoch: 183
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987

apply grads
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
epoch: 184
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.593498

apply grads
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
epoch: 185
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987955

apply grads
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.25
epoch: 186
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987

apply grads
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
epoch: 187
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987

apply grads
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
epoch: 188
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.59349

apply grads
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
epoch: 189
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.59349879

apply grads
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.5
epoch: 190
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934987

apply grads
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
epoch: 191
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.59349

apply grads
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.5
epoch: 192
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.593498795

apply grads
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
epoch: 193
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5934

apply grads
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
epoch: 194
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.593498795

apply grads
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
epoch: 195
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.5

apply grads
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
epoch: 196
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.59349879

apply grads
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
epoch: 197
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.59349879

apply grads
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
epoch: 198
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.1346732879651, -274.32504621616823, -278.32417160854095, -271.8044181260458, -284.07085945080365, -281.7830730813995, -285.3829438469659, -278.9314444333444, -282.2347917231543, -287.640893382687, -287.54471477211337, -285.7561362295773, -278.12707787605797, -289.1571990806973, -278.1233008318172, -283.8094174178629, -279.06234872628255, -279.57373640355297, -289.58992845903, -280.76264409919764, -288.9579400269795, -291.48002402135836, -286.97702395453354, -295.1262330668082, -282.58813543503436, -287.5323365461664, -292.29972384409024, -285.91836933233725, -291.38242196980775, -283.8325901199532, -287.3390759513386, -290.9808941485731, -275.23548189086245, -284.59349879551

1.25
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 976           |
|    time_elapsed    | 0             |
|    total timesteps | 804           |
| train/             |               |
|    KL_loss         | 0.00017237914 |
|    actor_loss      | 632           |
|    avg. z          | 1.3763234e-06 |
|    avg. z var      | 0.9999461     |
|    critic_loss     | 795           |
|    ent_coef        | 1.19e-05      |
|    ent_coef_loss   | -14.6         |
|    learning_rate   | 0.0003        |
|    n_updates       | 39800         |
--------------------------------------
apply grads
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
epoch: 199
meta_reward =  [-274.91581210704345, -260.8228147446063, -280.9178398885859, -267.1447384518852, -264.6759388804936, -258.19670474889415, -266.5632522717941, -273.134673287965

In [5]:
# since last time the sampling in evaluation and the sampling depth für context in train

In [6]:
a = [0.7860740250467451, 1.3779506616437, 1.5551784617926114, 0.7858287766954339, 2.928255854632301, 2.1984436580714473, 0.3458226800494484, 1.1588252059030784, 1.885503538619135, 1.3296746059353843, 2.3686750260717098, 2.38235572737476, 2.1685728538042, 2.711781597991056, 1.999393957709451, 1.8307119965196041, 1.260624986955272, 0.2671352016074815, 1.402548298157229, 2.4971121092039477, 2.279859532934837, 2.839511756498454, 0.37517377900679827, 0.5634252162792469, 1.1076871867784575, 0.3762089882646701, 0.9730737106816765, 1.59509821076111, 2.73336720468094, 1.7527743843309642, 1.248311818712994, 1.4736872317976357, 1.3468935565348823, 1.1823408005585798, 2.107571159141275, 2.731734949135858, 0.37990433907251275, 0.7641724093376423, 1.512021117484312, 1.0545722993955051, 1.6782903704242598, 2.8169478016378617, 1.2892761911000037, 1.4989447841160373, 2.992167429662661, 0.47605191646339684, 0.8343795584830792, 0.9630016215605014, 0.5557528352161346, 0.008703090506129718, 0.48863259980930185, 0.5975857056152952, 0.7963510149605818, 0.864501443436107, 0.047374356635443315, 1.2762439954335498, 2.010656574480702, 2.9203781981030765, 2.100895260148338, 2.3680082550723363, 1.6935569384236153, 2.1718436939592927, 2.728960574484791, 0.7084476270060592, 1.480194734343588, 1.022807914959746, 1.9006458178251902, 1.532128187898675, 1.598488390876416, 1.403537916973332, 2.979915119672488, 2.2720545866630646, 0.5357060314905813, 1.474444589494259, 0.48635227727992836, 1.5568759511574917, 2.2805153214175724, 2.0142083586378923, 2.3757763016921394, 2.8720776710137823, 2.195070206002873, 2.079658332603392, 2.9186169896064182, 2.1386625004738615, 0.17603797674033772, 0.5324039546451034, 1.124502553740213, 1.0493076380999455, 2.379069097887579, 0.5713365770889663, 2.9506458154309234, 2.6917428928705496, 0.7125322952456615, 1.0837847172522923, 1.2394239042739765, 1.531412840477841, 1.188193847652982, 2.5079036400976444, 0.043133571081973576, 0.03269028711044775]

In [7]:
len(a)

100

In [8]:
a =[0.7860740250467451, 1.3779506616437, 1.5551784617926114, 0.7858287766954339, 2.928255854632301, 2.1984436580714473, 0.3458226800494484, 1.1588252059030784, 1.885503538619135, 1.3296746059353843, 2.3686750260717098, 2.38235572737476, 2.1685728538042, 2.711781597991056, 2.9784953809599912, 2.096870367728471, 2.04652104928077, 1.999393957709451, 1.8307119965196041, 1.4436558931536259, 1.260624986955272, 1.8408117163039968, 1.0474247818307383, 0.2893499571670589, 0.2671352016074815, 1.402548298157229, 2.4971121092039477, 2.279859532934837, 2.839511756498454, 0.37517377900679827, 0.5634252162792469, 1.1076871867784575, 2.7450762138519322, 2.176699864999294, 0.3762089882646701, 0.9730737106816765, 1.59509821076111, 2.73336720468094, 2.050959316523306, 2.809525172141024, 1.7527743843309642, 1.248311818712994, 1.4736872317976357, 1.3468935565348823, 1.1823408005585798, 2.107571159141275, 2.731734949135858, 0.37990433907251275, 0.7641724093376423, 1.512021117484312, 1.0545722993955051, 1.6782903704242598, 2.8169478016378617, 1.2892761911000037, 0.8861106117834646, 1.4989447841160373, 2.992167429662661, 0.7015287662209706, 0.11168961358780638, 0.47605191646339684, 0.8343795584830792, 0.9630016215605014, 0.5557528352161346, 0.008703090506129718, 0.48863259980930185, 0.5975857056152952, 0.7963510149605818, 0.864501443436107, 0.047374356635443315, 1.2762439954335498, 2.010656574480702, 2.9203781981030765, 2.100895260148338, 2.3680082550723363, 1.6935569384236153, 2.1718436939592927, 2.728960574484791, 0.7084476270060592, 1.480194734343588, 0.09577530004016699, 1.022807914959746, 1.9006458178251902, 1.532128187898675, 1.598488390876416, 1.454276543853031, 1.403537916973332, 1.071058434727445, 2.979915119672488, 2.2720545866630646, 0.5357060314905813, 1.474444589494259, 0.48635227727992836, 1.5568759511574917, 2.2805153214175724, 2.0142083586378923, 2.3757763016921394, 2.2093259883723895, 0.7794378763219836, 2.8720776710137823, 2.195070206002873, 2.079658332603392, 2.9186169896064182, 2.1386625004738615, 2.802629411145565, 0.17603797674033772, 0.5324039546451034, 1.124502553740213, 1.0493076380999455, 1.744195890973208, 0.5583223214918457, 2.379069097887579, 0.08306914545160993, 0.5713365770889663, 1.2224346928609924, 1.1705025304184877, 0.14470529568442736, 2.9506458154309234, 2.6917428928705496, 2.5617488625541105, 2.317384380319182, 0.7125322952456615, 0.3765232265537465, 1.0837847172522923, 1.2394239042739765, 1.531412840477841, 1.188193847652982, 2.5079036400976444, 0.043133571081973576, 0.03269028711044775, 0.7144484446282852]