In [1]:
import gym

from stable_baselines3 import PPO
from stable_baselines3 import SAC
from stable_baselines3 import mSAC
from stable_baselines3.common.evaluation import evaluate_policy, evaluate_meta_policy

import numpy as np
import torch as th
from gym import spaces

##pyfly stuff
from pyfly.pyfly import PyFly
from pyfly.pid_controller import PIDController
import numpy as np
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d.axes3d as p3
import matplotlib.animation as animation

In [2]:
th.manual_seed(42)
np.random.seed(666)

In [3]:
import numpy as np
from gym.envs.mujoco import HalfCheetahEnv as HalfCheetahEnv_

class HalfCheetahEnv(HalfCheetahEnv_):
    def _get_obs(self):
        return np.concatenate([
            self.sim.data.qpos.flat[1:],
            self.sim.data.qvel.flat,
            self.get_body_com("torso").flat,
        ]).astype(np.float32).flatten()

    def viewer_setup(self):
        camera_id = self.model.camera_name2id('track')
        self.viewer.cam.type = 2
        self.viewer.cam.fixedcamid = camera_id
        self.viewer.cam.distance = self.model.stat.extent * 0.35
        # Hide the overlay
        self.viewer._hide_overlay = True

    def render(self, mode='human'):
        if mode == 'rgb_array':
            self._get_viewer().render()
            # window size used for old mujoco-py:
            width, height = 500, 500
            data = self._get_viewer().read_pixels(width, height, depth=False)
            return data
        elif mode == 'human':
            self._get_viewer().render()
            
import numpy as np



class HalfCheetahVelEnv(HalfCheetahEnv):
    """Half-cheetah environment with target velocity, as described in [1]. The
    code is adapted from
    https://github.com/cbfinn/maml_rl/blob/9c8e2ebd741cb0c7b8bf2d040c4caeeb8e06cc95/rllab/envs/mujoco/half_cheetah_env_rand.py
    The half-cheetah follows the dynamics from MuJoCo [2], and receives at each
    time step a reward composed of a control cost and a penalty equal to the
    difference between its current velocity and the target velocity. The tasks
    are generated by sampling the target velocities from the uniform
    distribution on [0, 2].
    [1] Chelsea Finn, Pieter Abbeel, Sergey Levine, "Model-Agnostic
        Meta-Learning for Fast Adaptation of Deep Networks", 2017
        (https://arxiv.org/abs/1703.03400)
    [2] Emanuel Todorov, Tom Erez, Yuval Tassa, "MuJoCo: A physics engine for
        model-based control", 2012
        (https://homes.cs.washington.edu/~todorov/papers/TodorovIROS12.pdf)
    """
    def __init__(self, task={}, n_tasks=30000, randomize_tasks=True):
        self._task = task
        self.tasks = self.sample_tasks(n_tasks)
        self._goal_vel = self.tasks[0].get('velocity', 0.0)
        self._goal = self._goal_vel
        self.i = 0
        super(HalfCheetahVelEnv, self).__init__()

    def step(self, action):
        xposbefore = self.sim.data.qpos[0]
        self.do_simulation(action, self.frame_skip)
        xposafter = self.sim.data.qpos[0]

        forward_vel = (xposafter - xposbefore) / self.dt
        forward_reward = -1.0 * abs(forward_vel - self._goal_vel)
        ctrl_cost = 0.5 * 1e-1 * np.sum(np.square(action))

        observation = self._get_obs()
        reward = forward_reward - ctrl_cost
        if self.i >= 200:
            done = True
            self.i = 0
            print(self._goal_vel)
            del self.tasks[0]
            self._goal_vel = self.tasks[0].get('velocity', 0.0)
            self._goal = self._goal_vel
            
        else:
            done = False
            self.i += 1
        infos = dict(reward_forward=forward_reward,
            reward_ctrl=-ctrl_cost, task=self._task)
        return (observation, reward, done, infos)

    def sample_tasks(self, num_tasks):
        np.random.seed(1337)
        print('goal sampled')
        velocities = np.random.choice([1.25, 1.5], num_tasks)#np.random.uniform(0.0, 3.0, size=(num_tasks,))
        tasks = [{'velocity': velocity} for velocity in velocities]
        return tasks

    def get_all_task_idx(self):
        return range(len(self.tasks))

    def reset_task(self, idx):
        self._task = self.tasks[idx]
        self._goal_vel = self._task['velocity']
        self._goal = self._goal_vel
        self.reset()

In [4]:
env = HalfCheetahVelEnv(n_tasks = 5000000)#gym.make('BipedalWalker-v3')#FooEnv()#

meta_model = mSAC('MlpPolicy', env, verbose=1,policy_kwargs=dict(net_arch=[300, 300, 300], latent_dim = 1, hidden_sizes=[200,200,200]))#,learning_rate=0.0006)

for param in meta_model.actor.context_encoder.parameters():
    print(param.data)

meta_reward = []
meta_std = []

print('-Start-')
n_eval =5

meta_model_mean_reward_before, meta_model_std_reward_before = evaluate_meta_policy(meta_model, env, n_eval_episodes=n_eval, add2buff = True)
meta_reward.append(meta_model_mean_reward_before)
meta_std.append(meta_model_std_reward_before)



print('##################################Start Learning##################################')
for i in range(200):
    
    meta_model.learn(total_timesteps=5*200)#, eval_freq=100, n_eval_episodes=5)
    meta_model_mean_reward, meta_model_std_reward = evaluate_meta_policy(meta_model, env, n_eval_episodes=30)

    meta_reward.append(meta_model_mean_reward)
    meta_std.append(meta_model_std_reward)
    
    
    print('epoch:', i)
    print('meta_reward = ', meta_reward)
    print('meta_std = ', meta_std)
    
env.close()

goal sampled


  return torch._C._cuda_getDeviceCount() > 0


Using cpu device
Wrapping the env in a DummyVecEnv.
critic with  27
critic with  27
critic with  27
critic with  27
tensor([[ 0.0032, -0.0832, -0.0601,  ..., -0.0273,  0.0409,  0.1588],
        [ 0.0448,  0.0076,  0.0344,  ...,  0.0833, -0.0209, -0.1424],
        [-0.1195, -0.0948, -0.0167,  ...,  0.0039, -0.0861,  0.0050],
        ...,
        [ 0.1500,  0.0135,  0.0602,  ..., -0.0077,  0.0866,  0.1876],
        [-0.0891,  0.1908,  0.1741,  ..., -0.0749,  0.0185, -0.0424],
        [ 0.1467, -0.0597, -0.0749,  ...,  0.1047, -0.1867, -0.0322]])
tensor([-1.8562e-01, -3.2392e-04, -3.1671e-02,  8.3155e-02, -1.1387e-01,
         1.8533e-02, -4.9836e-02, -6.2585e-02,  2.8700e-02,  7.5345e-02,
        -7.2845e-02, -1.3017e-01,  1.3956e-01,  1.2397e-01,  8.1937e-02,
        -1.3313e-01,  6.8562e-02,  1.6363e-01, -2.4181e-02, -3.2732e-02,
         1.1360e-01,  8.0258e-02,  1.1380e-01,  1.1968e-01,  9.8742e-02,
         5.2183e-02, -7.3694e-02, -1.6083e-01,  1.3696e-01, -1.5277e-01,
         1.5

1.5
1.25
1.25
1.5
##################################Start Learning##################################
rollout
collect with prior
collect with prior
1.5
collect with prior
1.5
1.25
1.25
-----------------------------
| time/              |      |
|    episodes        | 4    |
|    fps             | 1051 |
|    time_elapsed    | 0    |
|    total timesteps | 804  |
-----------------------------
apply grads
prior/posterior: 2.3333333333333335
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
epoch: 0
meta_reward =  [-281.72733962137175, -297.696504188453]
meta_std =  [24.125481031664226, 59.66280792199512]
rollout
collect with prior
collect with prior
1.25
collect with prior
1.25
1.25
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 913         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
|

apply grads
prior/posterior: 1.1666666666666667
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
epoch: 7
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185]
meta_std =  [24.125481031664226, 59.66280792199512, 53.07108299963847, 55.0406295837304, 53.84866968985381, 54.46754096274957, 54.504433577355684, 52.288181159053266, 54.389042122875125]
rollout
collect with prior
collect with prior
1.25
collect with prior
1.5
1.25
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 981         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 0.018183663 |
|    actor_loss      | 28.4        |
|    avg. 

apply grads
prior/posterior: 1.0952380952380953
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
epoch: 13
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211]
meta_std =  [24.125481031664226, 59.66280792199512, 53.07108299963847, 55.0406295837304, 53.84866968985381, 54.46754096274957, 54.504433577355684, 52.288181159053266, 54.389042122875125, 55.483963379005466, 55.11378583486159, 51.468410412436185, 53.53708510052253, 53.602803092407946, 54.43660807572289]
rollout
collect with prior
collect with prior
1.5
collect with prior
1.25
1.5
1.25
-------------------------------------
| time/              |              |
|    episodes        | 4   

1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.5
epoch: 18
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326]
meta_std =  [24.125481031664226, 59.66280792199512, 53.07108299963847, 55.0406295837304, 53.84866968985381, 54.46754096274957, 54.504433577355684, 52.288181159053266, 54.389042122875125, 55.483963379005466, 55.11378583486159, 51.468410412436185, 53.53708510052253, 53.602803092407946, 54.43660807572289, 54.04874734221543, 51.7878484952522, 55.52736875715725, 54.69932415550188, 54.55835626065249]
rollout
collect with prior
collect with prior
1.25
collect with prior
1.25
1

collect with prior
1.25
1.5
1.25
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 714          |
|    time_elapsed    | 1            |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 0.020511229  |
|    actor_loss      | 96.7         |
|    avg. z          | -0.005827334 |
|    avg. z var      | 0.9222399    |
|    critic_loss     | 40.1         |
|    ent_coef        | 0.271        |
|    ent_coef_loss   | -10.2        |
|    learning_rate   | 0.0003       |
|    n_updates       | 4600         |
-------------------------------------
apply grads
prior/posterior: 1.0555555555555556
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.25
epoch: 23
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294

apply grads
prior/posterior: 1.0476190476190477
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
epoch: 27
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646]
meta_std =  [24.125481031664226, 59.66280792199512, 53.07108299963847, 55.0406295837304, 53.84866968985381, 54.46754096274957, 54.504433577355684, 52.288181159053266, 54.389042122875125, 55.483963379005466, 

apply grads
prior/posterior: 1.0416666666666667
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
epoch: 31
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224]
meta_std =  [24.125481031664226, 59.66280792199512, 53.07108299963847, 55.0406295837304, 53.84866968985381, 54.4675409627495

collect with prior
1.5
1.5
1.25
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 771          |
|    time_elapsed    | 1            |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 0.13559675   |
|    actor_loss      | 158          |
|    avg. z          | -0.044330414 |
|    avg. z var      | 0.8035263    |
|    critic_loss     | 77.4         |
|    ent_coef        | 0.14         |
|    ent_coef_loss   | -12.6        |
|    learning_rate   | 0.0003       |
|    n_updates       | 7000         |
-------------------------------------
apply grads
prior/posterior: 1.037037037037037
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
epoch: 35
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.2429424

collect with prior
1.5
1.5
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 989         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 0.37166062  |
|    actor_loss      | 170         |
|    avg. z          | -0.13387215 |
|    avg. z var      | 0.65963423  |
|    critic_loss     | 94.7        |
|    ent_coef        | 0.111       |
|    ent_coef_loss   | -12.5       |
|    learning_rate   | 0.0003      |
|    n_updates       | 7800        |
------------------------------------
apply grads
prior/posterior: 1.0333333333333334
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
epoch: 39
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.395470592

apply grads
prior/posterior: 1.0310077519379846
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
epoch: 42
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.714479587725

1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
epoch: 45
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -245.90677125075018, -277.3376356282124, -246.380350511082, -2

apply grads
prior/posterior: 1.0272108843537415
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
epoch: 48
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.714479587725

apply grads
prior/posterior: 1.0256410256410255
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
epoch: 51
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, 

collect with prior
1.25
1.25
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 970         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 3.0323005   |
|    actor_loss      | 226         |
|    avg. z          | -0.72117585 |
|    avg. z var      | 0.10847033  |
|    critic_loss     | 144         |
|    ent_coef        | 0.0568      |
|    ent_coef_loss   | -3.47       |
|    learning_rate   | 0.0003      |
|    n_updates       | 10800       |
------------------------------------
apply grads
prior/posterior: 1.0242424242424242
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
epoch: 54
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.3954705

collect with prior
1.25
1.5
1.5
-----------------------------------
| time/              |            |
|    episodes        | 4          |
|    fps             | 966        |
|    time_elapsed    | 0          |
|    total timesteps | 804        |
| train/             |            |
|    KL_loss         | 2.9090986  |
|    actor_loss      | 229        |
|    avg. z          | -0.6502436 |
|    avg. z var      | 0.09739744 |
|    critic_loss     | 125        |
|    ent_coef        | 0.0543     |
|    ent_coef_loss   | 1.39       |
|    learning_rate   | 0.0003     |
|    n_updates       | 11400      |
-----------------------------------
apply grads
prior/posterior: 1.0229885057471264
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.25
epoch: 57
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7

collect with prior
1.5
1.25
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 975         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 2.75224     |
|    actor_loss      | 251         |
|    avg. z          | -0.72389823 |
|    avg. z var      | 0.07676013  |
|    critic_loss     | 121         |
|    ent_coef        | 0.0565      |
|    ent_coef_loss   | -0.205      |
|    learning_rate   | 0.0003      |
|    n_updates       | 12000       |
------------------------------------
apply grads
prior/posterior: 1.0218579234972678
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
epoch: 60
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.3954705

collect with prior
1.25
1.5
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 981         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 2.716373    |
|    actor_loss      | 258         |
|    avg. z          | -0.74507093 |
|    avg. z var      | 0.06352366  |
|    critic_loss     | 135         |
|    ent_coef        | 0.0501      |
|    ent_coef_loss   | -4.57       |
|    learning_rate   | 0.0003      |
|    n_updates       | 12600       |
------------------------------------
apply grads
prior/posterior: 1.0208333333333333
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
epoch: 63
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.395470

collect with prior
1.5
1.5
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 987         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 3.015393    |
|    actor_loss      | 273         |
|    avg. z          | -0.43544173 |
|    avg. z var      | 0.06378343  |
|    critic_loss     | 96.9        |
|    ent_coef        | 0.042       |
|    ent_coef_loss   | -7.2        |
|    learning_rate   | 0.0003      |
|    n_updates       | 13200       |
------------------------------------
apply grads
prior/posterior: 1.0199004975124377
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
epoch: 66
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.3954

collect with prior
1.5
1.25
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 969         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 2.827605    |
|    actor_loss      | 280         |
|    avg. z          | -0.95298856 |
|    avg. z var      | 0.07169857  |
|    critic_loss     | 124         |
|    ent_coef        | 0.034       |
|    ent_coef_loss   | -6.3        |
|    learning_rate   | 0.0003      |
|    n_updates       | 13800       |
------------------------------------
apply grads
prior/posterior: 1.019047619047619
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
epoch: 69
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.3

1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
epoch: 71
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -245.90677125075018, -277.3376356282124, -246.380

collect with prior
1.25
1.5
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 967         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.430148    |
|    actor_loss      | 301         |
|    avg. z          | -0.63498855 |
|    avg. z var      | 0.047406092 |
|    critic_loss     | 175         |
|    ent_coef        | 0.0339      |
|    ent_coef_loss   | 4.66        |
|    learning_rate   | 0.0003      |
|    n_updates       | 14800       |
------------------------------------
apply grads
prior/posterior: 1.0177777777777777
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
epoch: 74
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547

apply grads
prior/posterior: 1.0173160173160174
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
epoch: 76
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.71447958

1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
epoch: 78
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -245.90677125075018, -277.3376356282124, -246.380350511082, -264.7406943023678, -186.94594224528535, -281.677582048449

1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
epoch: 80
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -245.90677125075018, -277.3376356282124, -246.380350511082, -264.7406943023678, -186.94594224528

apply grads
prior/posterior: 1.0160642570281124
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
epoch: 82
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.71447958772

apply grads
prior/posterior: 1.0156862745098039
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
epoch: 84
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.71447958772

apply grads
prior/posterior: 1.0153256704980842
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
epoch: 86
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.714479587

collect with prior
1.5
1.5
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 951         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.2078943   |
|    actor_loss      | 343         |
|    avg. z          | -1.1376191  |
|    avg. z var      | 0.030606251 |
|    critic_loss     | 174         |
|    ent_coef        | 0.041       |
|    ent_coef_loss   | 9.28        |
|    learning_rate   | 0.0003      |
|    n_updates       | 17600       |
------------------------------------
apply grads
prior/posterior: 1.0149812734082397
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
epoch: 88
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.3954705

collect with prior
1.25
1.25
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 768         |
|    time_elapsed    | 1           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 3.2273383   |
|    actor_loss      | 342         |
|    avg. z          | -1.0032964  |
|    avg. z var      | 0.028016891 |
|    critic_loss     | 242         |
|    ent_coef        | 0.0429      |
|    ent_coef_loss   | -5.55       |
|    learning_rate   | 0.0003      |
|    n_updates       | 18000       |
------------------------------------
apply grads
prior/posterior: 1.0146520146520146
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
epoch: 90
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.395

collect with prior
1.25
1.25
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 978         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.4006987   |
|    actor_loss      | 358         |
|    avg. z          | -1.0704718  |
|    avg. z var      | 0.021814937 |
|    critic_loss     | 198         |
|    ent_coef        | 0.0411      |
|    ent_coef_loss   | -8.18       |
|    learning_rate   | 0.0003      |
|    n_updates       | 18400       |
------------------------------------
apply grads
prior/posterior: 1.014336917562724
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
epoch: 92
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.395470

collect with prior
1.25
1.25
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 960         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 3.913455    |
|    actor_loss      | 361         |
|    avg. z          | -0.7600064  |
|    avg. z var      | 0.017894037 |
|    critic_loss     | 147         |
|    ent_coef        | 0.0345      |
|    ent_coef_loss   | -7.74       |
|    learning_rate   | 0.0003      |
|    n_updates       | 18800       |
------------------------------------
apply grads
prior/posterior: 1.0140350877192983
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
epoch: 94
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39

collect with prior
1.5
1.25
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 963         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.458928    |
|    actor_loss      | 358         |
|    avg. z          | -1.1138421  |
|    avg. z var      | 0.029985903 |
|    critic_loss     | 230         |
|    ent_coef        | 0.0308      |
|    ent_coef_loss   | -3.07       |
|    learning_rate   | 0.0003      |
|    n_updates       | 19200       |
------------------------------------
apply grads
prior/posterior: 1.013745704467354
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
epoch: 96
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547

collect with prior
1.5
1.5
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 974         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.0924473   |
|    actor_loss      | 373         |
|    avg. z          | -1.334641   |
|    avg. z var      | 0.040363826 |
|    critic_loss     | 221         |
|    ent_coef        | 0.0298      |
|    ent_coef_loss   | -1.41       |
|    learning_rate   | 0.0003      |
|    n_updates       | 19600       |
------------------------------------
apply grads
prior/posterior: 1.0134680134680134
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
epoch: 98
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059

collect with prior
1.25
1.5
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 975         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.3913975   |
|    actor_loss      | 385         |
|    avg. z          | -1.429892   |
|    avg. z var      | 0.029460203 |
|    critic_loss     | 131         |
|    ent_coef        | 0.0276      |
|    ent_coef_loss   | -1.81       |
|    learning_rate   | 0.0003      |
|    n_updates       | 20000       |
------------------------------------
apply grads
prior/posterior: 1.0132013201320131
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
epoch: 100
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.3954

collect with prior
1.5
1.25
1.5
-----------------------------------
| time/              |            |
|    episodes        | 4          |
|    fps             | 961        |
|    time_elapsed    | 0          |
|    total timesteps | 804        |
| train/             |            |
|    KL_loss         | 5.642308   |
|    actor_loss      | 381        |
|    avg. z          | -1.6047542 |
|    avg. z var      | 0.03615104 |
|    critic_loss     | 243        |
|    ent_coef        | 0.0281     |
|    ent_coef_loss   | -0.262     |
|    learning_rate   | 0.0003     |
|    n_updates       | 20400      |
-----------------------------------
apply grads
prior/posterior: 1.0129449838187703
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
epoch: 102
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.728983

collect with prior
1.25
1.5
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 984         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.529488    |
|    actor_loss      | 382         |
|    avg. z          | -1.0635127  |
|    avg. z var      | 0.023668053 |
|    critic_loss     | 194         |
|    ent_coef        | 0.0282      |
|    ent_coef_loss   | -4.04       |
|    learning_rate   | 0.0003      |
|    n_updates       | 20800       |
------------------------------------
apply grads
prior/posterior: 1.0126984126984127
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
epoch: 104
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.395470

collect with prior
1.25
1.5
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 961         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.505736    |
|    actor_loss      | 384         |
|    avg. z          | -1.1829301  |
|    avg. z var      | 0.028330721 |
|    critic_loss     | 184         |
|    ent_coef        | 0.0242      |
|    ent_coef_loss   | -8.51       |
|    learning_rate   | 0.0003      |
|    n_updates       | 21200       |
------------------------------------
apply grads
prior/posterior: 1.0124610591900312
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
epoch: 106
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.

collect with prior
1.5
1.5
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 973         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.4015636   |
|    actor_loss      | 390         |
|    avg. z          | -0.9708408  |
|    avg. z var      | 0.013140213 |
|    critic_loss     | 262         |
|    ent_coef        | 0.0229      |
|    ent_coef_loss   | 1.28        |
|    learning_rate   | 0.0003      |
|    n_updates       | 21600       |
------------------------------------
apply grads
prior/posterior: 1.0122324159021407
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.25
epoch: 108
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.3954

collect with prior
1.5
1.25
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 965         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 3.5221632   |
|    actor_loss      | 379         |
|    avg. z          | -0.5057603  |
|    avg. z var      | 0.032752067 |
|    critic_loss     | 318         |
|    ent_coef        | 0.024       |
|    ent_coef_loss   | 6.92        |
|    learning_rate   | 0.0003      |
|    n_updates       | 22000       |
------------------------------------
apply grads
prior/posterior: 1.012012012012012
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
epoch: 110
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547

collect with prior
1.5
1.25
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 847         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.4980593   |
|    actor_loss      | 383         |
|    avg. z          | -0.9901955  |
|    avg. z var      | 0.034123585 |
|    critic_loss     | 306         |
|    ent_coef        | 0.0268      |
|    ent_coef_loss   | 5.39        |
|    learning_rate   | 0.0003      |
|    n_updates       | 22400       |
------------------------------------
apply grads
prior/posterior: 1.0117994100294985
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
epoch: 112
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.

collect with prior
1.5
1.5
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 983         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.5129004   |
|    actor_loss      | 402         |
|    avg. z          | -0.9392653  |
|    avg. z var      | 0.012086902 |
|    critic_loss     | 273         |
|    ent_coef        | 0.0317      |
|    ent_coef_loss   | 8.84        |
|    learning_rate   | 0.0003      |
|    n_updates       | 22800       |
------------------------------------
apply grads
prior/posterior: 1.0115942028985507
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
epoch: 114
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.3

collect with prior
1.5
1.5
1.25
-----------------------------------
| time/              |            |
|    episodes        | 4          |
|    fps             | 979        |
|    time_elapsed    | 0          |
|    total timesteps | 804        |
| train/             |            |
|    KL_loss         | 4.9064665  |
|    actor_loss      | 399        |
|    avg. z          | -1.10718   |
|    avg. z var      | 0.01952851 |
|    critic_loss     | 252        |
|    ent_coef        | 0.0375     |
|    ent_coef_loss   | 9.92       |
|    learning_rate   | 0.0003     |
|    n_updates       | 23200      |
-----------------------------------
apply grads
prior/posterior: 1.0113960113960114
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
epoch: 116
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7

collect with prior
1.25
1.5
1.25
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 961          |
|    time_elapsed    | 0            |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 6.804654     |
|    actor_loss      | 429          |
|    avg. z          | -1.6425334   |
|    avg. z var      | 0.0129136825 |
|    critic_loss     | 351          |
|    ent_coef        | 0.0442       |
|    ent_coef_loss   | 7.82         |
|    learning_rate   | 0.0003       |
|    n_updates       | 23600        |
-------------------------------------
apply grads
prior/posterior: 1.011204481792717
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
epoch: 118
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.2429424185243

collect with prior
1.5
1.25
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 961         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 3.5771656   |
|    actor_loss      | 404         |
|    avg. z          | -0.865773   |
|    avg. z var      | 0.019723536 |
|    critic_loss     | 279         |
|    ent_coef        | 0.0467      |
|    ent_coef_loss   | 3.44        |
|    learning_rate   | 0.0003      |
|    n_updates       | 24000       |
------------------------------------
apply grads
prior/posterior: 1.0110192837465564
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
epoch: 120
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547

collect with prior
1.5
1.5
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 978         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.9343214   |
|    actor_loss      | 424         |
|    avg. z          | -1.5204422  |
|    avg. z var      | 0.017641507 |
|    critic_loss     | 265         |
|    ent_coef        | 0.0529      |
|    ent_coef_loss   | 4.72        |
|    learning_rate   | 0.0003      |
|    n_updates       | 24400       |
------------------------------------
apply grads
prior/posterior: 1.010840108401084
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
epoch: 122
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.3954705

collect with prior
1.5
1.5
1.5
-----------------------------------
| time/              |            |
|    episodes        | 4          |
|    fps             | 962        |
|    time_elapsed    | 0          |
|    total timesteps | 804        |
| train/             |            |
|    KL_loss         | 4.560647   |
|    actor_loss      | 410        |
|    avg. z          | -0.9823355 |
|    avg. z var      | 0.00928383 |
|    critic_loss     | 414        |
|    ent_coef        | 0.0523     |
|    ent_coef_loss   | -6.25      |
|    learning_rate   | 0.0003     |
|    n_updates       | 24800      |
-----------------------------------
apply grads
prior/posterior: 1.0106666666666666
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
epoch: 124
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7

collect with prior
1.5
1.5
1.25
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 956          |
|    time_elapsed    | 0            |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 4.582038     |
|    actor_loss      | 420          |
|    avg. z          | -1.1391265   |
|    avg. z var      | 0.0153949205 |
|    critic_loss     | 542          |
|    ent_coef        | 0.0515       |
|    ent_coef_loss   | 6.69         |
|    learning_rate   | 0.0003       |
|    n_updates       | 25200        |
-------------------------------------
apply grads
prior/posterior: 1.010498687664042
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
epoch: 126
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852

collect with prior
1.25
1.25
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 961         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 5.596793    |
|    actor_loss      | 446         |
|    avg. z          | -1.524957   |
|    avg. z var      | 0.023643306 |
|    critic_loss     | 291         |
|    ent_coef        | 0.0536      |
|    ent_coef_loss   | 1.93        |
|    learning_rate   | 0.0003      |
|    n_updates       | 25600       |
------------------------------------
apply grads
prior/posterior: 1.0103359173126616
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
epoch: 128
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39

collect with prior
1.25
1.5
1.5
-----------------------------------
| time/              |            |
|    episodes        | 4          |
|    fps             | 968        |
|    time_elapsed    | 0          |
|    total timesteps | 804        |
| train/             |            |
|    KL_loss         | 3.7714143  |
|    actor_loss      | 413        |
|    avg. z          | -0.652953  |
|    avg. z var      | 0.02054494 |
|    critic_loss     | 306        |
|    ent_coef        | 0.0574     |
|    ent_coef_loss   | 3.04       |
|    learning_rate   | 0.0003     |
|    n_updates       | 26000      |
-----------------------------------
apply grads
prior/posterior: 1.0101781170483461
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.5
epoch: 130
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -2

collect with prior
1.25
1.5
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 974         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.3907647   |
|    actor_loss      | 413         |
|    avg. z          | -1.1784778  |
|    avg. z var      | 0.029137323 |
|    critic_loss     | 324         |
|    ent_coef        | 0.058       |
|    ent_coef_loss   | -4.28       |
|    learning_rate   | 0.0003      |
|    n_updates       | 26400       |
------------------------------------
apply grads
prior/posterior: 1.0100250626566416
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
epoch: 132
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.3

collect with prior
1.5
1.5
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 947         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 5.37318     |
|    actor_loss      | 440         |
|    avg. z          | -1.4020071  |
|    avg. z var      | 0.022627005 |
|    critic_loss     | 281         |
|    ent_coef        | 0.0517      |
|    ent_coef_loss   | -1.42       |
|    learning_rate   | 0.0003      |
|    n_updates       | 26800       |
------------------------------------
apply grads
prior/posterior: 1.0098765432098766
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.25
epoch: 134
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547

collect with prior
1.5
1.25
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 962         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.834833    |
|    actor_loss      | 446         |
|    avg. z          | -1.3650949  |
|    avg. z var      | 0.020624423 |
|    critic_loss     | 370         |
|    ent_coef        | 0.0519      |
|    ent_coef_loss   | 2.81        |
|    learning_rate   | 0.0003      |
|    n_updates       | 27200       |
------------------------------------
apply grads
prior/posterior: 1.0097323600973236
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
epoch: 136
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.395470

collect with prior
1.25
1.25
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 965         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 5.1830726   |
|    actor_loss      | 439         |
|    avg. z          | -1.2928395  |
|    avg. z var      | 0.013756036 |
|    critic_loss     | 385         |
|    ent_coef        | 0.0493      |
|    ent_coef_loss   | -5.39       |
|    learning_rate   | 0.0003      |
|    n_updates       | 27600       |
------------------------------------
apply grads
prior/posterior: 1.0095923261390887
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
epoch: 138
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.395

collect with prior
1.25
1.25
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 975         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.380814    |
|    actor_loss      | 426         |
|    avg. z          | -0.97654146 |
|    avg. z var      | 0.02454514  |
|    critic_loss     | 371         |
|    ent_coef        | 0.0472      |
|    ent_coef_loss   | 4.41        |
|    learning_rate   | 0.0003      |
|    n_updates       | 28000       |
------------------------------------
apply grads
prior/posterior: 1.0094562647754137
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
epoch: 140
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.

collect with prior
1.5
1.5
1.25
-----------------------------------
| time/              |            |
|    episodes        | 4          |
|    fps             | 985        |
|    time_elapsed    | 0          |
|    total timesteps | 804        |
| train/             |            |
|    KL_loss         | 5.6993737  |
|    actor_loss      | 440        |
|    avg. z          | -1.2016867 |
|    avg. z var      | 0.01923971 |
|    critic_loss     | 328        |
|    ent_coef        | 0.0451     |
|    ent_coef_loss   | -6.33      |
|    learning_rate   | 0.0003     |
|    n_updates       | 28400      |
-----------------------------------
apply grads
prior/posterior: 1.0093240093240092
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
epoch: 142
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.72

collect with prior
1.25
1.5
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 976         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.2219453   |
|    actor_loss      | 425         |
|    avg. z          | -1.1440545  |
|    avg. z var      | 0.029530287 |
|    critic_loss     | 293         |
|    ent_coef        | 0.0389      |
|    ent_coef_loss   | -5.07       |
|    learning_rate   | 0.0003      |
|    n_updates       | 28800       |
------------------------------------
apply grads
prior/posterior: 1.0091954022988505
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
epoch: 144
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39

collect with prior
1.25
1.25
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 973         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 5.230321    |
|    actor_loss      | 429         |
|    avg. z          | -0.64232796 |
|    avg. z var      | 0.02698706  |
|    critic_loss     | 316         |
|    ent_coef        | 0.0373      |
|    ent_coef_loss   | 0.142       |
|    learning_rate   | 0.0003      |
|    n_updates       | 29200       |
------------------------------------
apply grads
prior/posterior: 1.0090702947845804
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.25
epoch: 146
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, 

collect with prior
1.5
1.25
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 958         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.942524    |
|    actor_loss      | 438         |
|    avg. z          | -1.0072247  |
|    avg. z var      | 0.010259912 |
|    critic_loss     | 456         |
|    ent_coef        | 0.0341      |
|    ent_coef_loss   | 0.711       |
|    learning_rate   | 0.0003      |
|    n_updates       | 29600       |
------------------------------------
apply grads
prior/posterior: 1.0089485458612975
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
epoch: 148
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39

collect with prior
1.5
1.25
1.5
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 981          |
|    time_elapsed    | 0            |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 5.6918755    |
|    actor_loss      | 446          |
|    avg. z          | -0.77536494  |
|    avg. z var      | 0.0032290295 |
|    critic_loss     | 339          |
|    ent_coef        | 0.0387       |
|    ent_coef_loss   | 5.18         |
|    learning_rate   | 0.0003       |
|    n_updates       | 30000        |
-------------------------------------
apply grads
prior/posterior: 1.0088300220750552
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
epoch: 150
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.2429424185

collect with prior
1.25
1.5
1.5
-----------------------------------
| time/              |            |
|    episodes        | 4          |
|    fps             | 963        |
|    time_elapsed    | 0          |
|    total timesteps | 804        |
| train/             |            |
|    KL_loss         | 4.918146   |
|    actor_loss      | 453        |
|    avg. z          | -1.0285946 |
|    avg. z var      | 0.00711617 |
|    critic_loss     | 373        |
|    ent_coef        | 0.0441     |
|    ent_coef_loss   | 14.6       |
|    learning_rate   | 0.0003     |
|    n_updates       | 30400      |
-----------------------------------
apply grads
prior/posterior: 1.008714596949891
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
epoch: 152
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -

collect with prior
1.5
1.5
1.5
-----------------------------------
| time/              |            |
|    episodes        | 4          |
|    fps             | 972        |
|    time_elapsed    | 0          |
|    total timesteps | 804        |
| train/             |            |
|    KL_loss         | 6.3259583  |
|    actor_loss      | 460        |
|    avg. z          | -1.6469617 |
|    avg. z var      | 0.02828855 |
|    critic_loss     | 252        |
|    ent_coef        | 0.0495     |
|    ent_coef_loss   | 0.603      |
|    learning_rate   | 0.0003     |
|    n_updates       | 30800      |
-----------------------------------
apply grads
prior/posterior: 1.0086021505376344
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
epoch: 154
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832

collect with prior
1.25
1.25
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 988         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 7.0122337   |
|    actor_loss      | 456         |
|    avg. z          | -1.4939506  |
|    avg. z var      | 0.012302521 |
|    critic_loss     | 548         |
|    ent_coef        | 0.0503      |
|    ent_coef_loss   | 4.37        |
|    learning_rate   | 0.0003      |
|    n_updates       | 31200       |
------------------------------------
apply grads
prior/posterior: 1.0084925690021231
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
epoch: 156
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39

collect with prior
1.25
1.25
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 971         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 6.071831    |
|    actor_loss      | 456         |
|    avg. z          | -1.3582877  |
|    avg. z var      | 0.005395701 |
|    critic_loss     | 357         |
|    ent_coef        | 0.0538      |
|    ent_coef_loss   | -2.44       |
|    learning_rate   | 0.0003      |
|    n_updates       | 31600       |
------------------------------------
apply grads
prior/posterior: 1.0083857442348008
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
epoch: 158
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -26

collect with prior
1.25
1.5
1.5
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 977          |
|    time_elapsed    | 0            |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 5.373699     |
|    actor_loss      | 445          |
|    avg. z          | -0.71425486  |
|    avg. z var      | 0.0038723985 |
|    critic_loss     | 244          |
|    ent_coef        | 0.0535       |
|    ent_coef_loss   | -5.28        |
|    learning_rate   | 0.0003       |
|    n_updates       | 32000        |
-------------------------------------
apply grads
prior/posterior: 1.0082815734989647
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.5
epoch: 160
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.2429

1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
epoch: 161
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -245.90677125075018, -277.3376356282124, -246.380350511082, -264.7406943023678, -186.94594224528535, -281.6775820484499, -417.991028932099, -21

collect with prior
1.25
1.25
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 969         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.450609    |
|    actor_loss      | 458         |
|    avg. z          | -0.9201894  |
|    avg. z var      | 0.006951015 |
|    critic_loss     | 397         |
|    ent_coef        | 0.0472      |
|    ent_coef_loss   | -3.23       |
|    learning_rate   | 0.0003      |
|    n_updates       | 32600       |
------------------------------------
apply grads
prior/posterior: 1.008130081300813
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
epoch: 163
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547

1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
epoch: 164
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -245.90677125075018, -277.3376356282124, -246.3803

collect with prior
1.25
1.25
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 988         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 6.239516    |
|    actor_loss      | 443         |
|    avg. z          | -1.3309011  |
|    avg. z var      | 0.011326418 |
|    critic_loss     | 477         |
|    ent_coef        | 0.0517      |
|    ent_coef_loss   | 13.4        |
|    learning_rate   | 0.0003      |
|    n_updates       | 33200       |
------------------------------------
apply grads
prior/posterior: 1.0079840319361277
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
epoch: 166
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.395

apply grads
prior/posterior: 1.007936507936508
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.5
epoch: 167
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.714479587725

collect with prior
1.5
1.5
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 979         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.3659186   |
|    actor_loss      | 438         |
|    avg. z          | -0.6055891  |
|    avg. z var      | 0.015802799 |
|    critic_loss     | 362         |
|    ent_coef        | 0.0635      |
|    ent_coef_loss   | 10.3        |
|    learning_rate   | 0.0003      |
|    n_updates       | 33800       |
------------------------------------
apply grads
prior/posterior: 1.007843137254902
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
epoch: 169
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39

apply grads
prior/posterior: 1.0077972709551657
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
epoch: 170
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.714479587

collect with prior
1.25
1.25
1.25
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 974         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 6.674166    |
|    actor_loss      | 480         |
|    avg. z          | -1.3306668  |
|    avg. z var      | 0.005747438 |
|    critic_loss     | 287         |
|    ent_coef        | 0.0661      |
|    ent_coef_loss   | 2.36        |
|    learning_rate   | 0.0003      |
|    n_updates       | 34400       |
------------------------------------
apply grads
prior/posterior: 1.0077071290944124
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
epoch: 172
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.3954

apply grads
prior/posterior: 1.0076628352490422
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.25
1.5
epoch: 173
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877

collect with prior
1.25
1.5
1.5
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 958          |
|    time_elapsed    | 0            |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 5.0466824    |
|    actor_loss      | 454          |
|    avg. z          | -0.9863373   |
|    avg. z var      | 0.0061867293 |
|    critic_loss     | 350          |
|    ent_coef        | 0.0734       |
|    ent_coef_loss   | 5.12         |
|    learning_rate   | 0.0003       |
|    n_updates       | 35000        |
-------------------------------------
apply grads
prior/posterior: 1.0075757575757576
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
epoch: 175
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.242942418

apply grads
prior/posterior: 1.0075329566854991
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
epoch: 176
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -245

collect with prior
1.5
1.5
1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 968         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 4.7949963   |
|    actor_loss      | 465         |
|    avg. z          | -1.046766   |
|    avg. z var      | 0.009463252 |
|    critic_loss     | 684         |
|    ent_coef        | 0.0829      |
|    ent_coef_loss   | 1.42        |
|    learning_rate   | 0.0003      |
|    n_updates       | 35600       |
------------------------------------
apply grads
prior/posterior: 1.0074487895716946
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
epoch: 178
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.395

apply grads
prior/posterior: 1.0074074074074073
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
epoch: 179
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -2

1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
epoch: 180
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -245.90677125075018, -277.3376356282124, -246.380350511082, -264.7406943023678, -186.945942245

1.25
1.25
1.5
1.25
1.5
1.25
1.5
epoch: 181
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -245.90677125075018, -277.3376356282124, -246.380350511082, -264.7406943023678, -186.94594224528535, -281.6775820484499, -417.991028932099, -211.016

1.25
1.25
1.5
1.5
1.25
1.25
1.25
epoch: 182
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -245.90677125075018, -277.3376356282124, -246.380350511082, -264.7406943023678, -186.94594224528535, -281.6775820484499, -417.991028932099, -211.01

1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.5
epoch: 183
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -245.90677125075018, -277.3376356282124, -246.380350511082, -264.7406943023678, -186.94594224528535, -281.6775820484499, -417.991

1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
epoch: 184
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -245.90677125075018, -277.3376356282124, -246.380350511082, -264.74

apply grads
prior/posterior: 1.007168458781362
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
epoch: 185
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255

apply grads
prior/posterior: 1.0071301247771836
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
epoch: 186
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.714479587725

apply grads
prior/posterior: 1.0070921985815602
1.5
1.25
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
epoch: 187
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.714479587

apply grads
prior/posterior: 1.0070546737213404
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.5
epoch: 188
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -245

apply grads
prior/posterior: 1.0070175438596491
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
epoch: 189
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.71447958772

apply grads
prior/posterior: 1.006980802792321
1.25
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
epoch: 190
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255

apply grads
prior/posterior: 1.0069444444444444
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.5
epoch: 191
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.71447958772

apply grads
prior/posterior: 1.0069084628670122
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.25
epoch: 192
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, 

apply grads
prior/posterior: 1.006872852233677
1.5
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
1.5
1.5
epoch: 193
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, 

apply grads
prior/posterior: 1.0068376068376068
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.25
epoch: 194
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, 

apply grads
prior/posterior: 1.0068027210884354
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.5
1.25
1.25
1.5
epoch: 195
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255,

apply grads
prior/posterior: 1.0067681895093064
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
1.25
1.5
1.5
1.5
1.5
1.25
1.5
epoch: 196
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255,

apply grads
prior/posterior: 1.0067340067340067
1.25
1.25
1.5
1.5
1.5
1.25
1.25
1.5
1.5
1.5
1.5
1.5
1.5
1.5
1.25
1.5
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.5
1.25
1.25
1.5
1.5
1.25
1.25
epoch: 197
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.7144795877255, -

apply grads
prior/posterior: 1.0067001675041876
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.5
1.25
1.5
1.25
1.5
1.25
1.5
1.25
1.25
1.25
1.25
1.25
1.5
1.5
1.5
1.25
1.5
1.5
1.5
1.25
1.25
1.25
1.5
epoch: 198
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.7289832326335, -266.940459063185, -273.9057017716561, -274.2678100537612, -248.65272555269567, -258.81353352094004, -257.2966821488289, -265.420370833211, -264.50238833299585, -255.32663841422007, -271.26639230516673, -269.6049043430677, -270.6974893169326, -269.4970802909729, -263.17817855923846, -263.4491382029321, -250.9048839199915, -255.32414322387, -259.09662491312315, -258.1390870540691, -244.13361039514317, -268.87536795688646, -253.33670568535683, -249.79840162964143, -258.98901479200913, -265.50113490322224, -280.82552109382397, -244.27294483608335, -258.9527018881678, -265.64536114310454, -282.6273251258732, -257.714479587725

1.5
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 982         |
|    time_elapsed    | 0           |
|    total timesteps | 804         |
| train/             |             |
|    KL_loss         | 6.4738245   |
|    actor_loss      | 467         |
|    avg. z          | -1.2029169  |
|    avg. z var      | 0.011806675 |
|    critic_loss     | 666         |
|    ent_coef        | 0.0923      |
|    ent_coef_loss   | -0.585      |
|    learning_rate   | 0.0003      |
|    n_updates       | 39800       |
------------------------------------
apply grads
prior/posterior: 1.0066666666666666
1.25
1.5
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.25
1.5
1.25
1.5
1.5
1.5
1.5
1.25
1.25
1.25
1.5
1.25
1.5
1.25
1.5
epoch: 199
meta_reward =  [-281.72733962137175, -297.696504188453, -259.55673565166506, -268.864582627905, -265.484702409847, -267.24294241852436, -265.39547059289833, -257.72898323263

In [5]:
# since last time the sampling in evaluation and the sampling depth für context in train

In [6]:
a = [0.7860740250467451, 1.3779506616437, 1.5551784617926114, 0.7858287766954339, 2.928255854632301, 2.1984436580714473, 0.3458226800494484, 1.1588252059030784, 1.885503538619135, 1.3296746059353843, 2.3686750260717098, 2.38235572737476, 2.1685728538042, 2.711781597991056, 1.999393957709451, 1.8307119965196041, 1.260624986955272, 0.2671352016074815, 1.402548298157229, 2.4971121092039477, 2.279859532934837, 2.839511756498454, 0.37517377900679827, 0.5634252162792469, 1.1076871867784575, 0.3762089882646701, 0.9730737106816765, 1.59509821076111, 2.73336720468094, 1.7527743843309642, 1.248311818712994, 1.4736872317976357, 1.3468935565348823, 1.1823408005585798, 2.107571159141275, 2.731734949135858, 0.37990433907251275, 0.7641724093376423, 1.512021117484312, 1.0545722993955051, 1.6782903704242598, 2.8169478016378617, 1.2892761911000037, 1.4989447841160373, 2.992167429662661, 0.47605191646339684, 0.8343795584830792, 0.9630016215605014, 0.5557528352161346, 0.008703090506129718, 0.48863259980930185, 0.5975857056152952, 0.7963510149605818, 0.864501443436107, 0.047374356635443315, 1.2762439954335498, 2.010656574480702, 2.9203781981030765, 2.100895260148338, 2.3680082550723363, 1.6935569384236153, 2.1718436939592927, 2.728960574484791, 0.7084476270060592, 1.480194734343588, 1.022807914959746, 1.9006458178251902, 1.532128187898675, 1.598488390876416, 1.403537916973332, 2.979915119672488, 2.2720545866630646, 0.5357060314905813, 1.474444589494259, 0.48635227727992836, 1.5568759511574917, 2.2805153214175724, 2.0142083586378923, 2.3757763016921394, 2.8720776710137823, 2.195070206002873, 2.079658332603392, 2.9186169896064182, 2.1386625004738615, 0.17603797674033772, 0.5324039546451034, 1.124502553740213, 1.0493076380999455, 2.379069097887579, 0.5713365770889663, 2.9506458154309234, 2.6917428928705496, 0.7125322952456615, 1.0837847172522923, 1.2394239042739765, 1.531412840477841, 1.188193847652982, 2.5079036400976444, 0.043133571081973576, 0.03269028711044775]

In [7]:
len(a)

100

In [8]:
a =[0.7860740250467451, 1.3779506616437, 1.5551784617926114, 0.7858287766954339, 2.928255854632301, 2.1984436580714473, 0.3458226800494484, 1.1588252059030784, 1.885503538619135, 1.3296746059353843, 2.3686750260717098, 2.38235572737476, 2.1685728538042, 2.711781597991056, 2.9784953809599912, 2.096870367728471, 2.04652104928077, 1.999393957709451, 1.8307119965196041, 1.4436558931536259, 1.260624986955272, 1.8408117163039968, 1.0474247818307383, 0.2893499571670589, 0.2671352016074815, 1.402548298157229, 2.4971121092039477, 2.279859532934837, 2.839511756498454, 0.37517377900679827, 0.5634252162792469, 1.1076871867784575, 2.7450762138519322, 2.176699864999294, 0.3762089882646701, 0.9730737106816765, 1.59509821076111, 2.73336720468094, 2.050959316523306, 2.809525172141024, 1.7527743843309642, 1.248311818712994, 1.4736872317976357, 1.3468935565348823, 1.1823408005585798, 2.107571159141275, 2.731734949135858, 0.37990433907251275, 0.7641724093376423, 1.512021117484312, 1.0545722993955051, 1.6782903704242598, 2.8169478016378617, 1.2892761911000037, 0.8861106117834646, 1.4989447841160373, 2.992167429662661, 0.7015287662209706, 0.11168961358780638, 0.47605191646339684, 0.8343795584830792, 0.9630016215605014, 0.5557528352161346, 0.008703090506129718, 0.48863259980930185, 0.5975857056152952, 0.7963510149605818, 0.864501443436107, 0.047374356635443315, 1.2762439954335498, 2.010656574480702, 2.9203781981030765, 2.100895260148338, 2.3680082550723363, 1.6935569384236153, 2.1718436939592927, 2.728960574484791, 0.7084476270060592, 1.480194734343588, 0.09577530004016699, 1.022807914959746, 1.9006458178251902, 1.532128187898675, 1.598488390876416, 1.454276543853031, 1.403537916973332, 1.071058434727445, 2.979915119672488, 2.2720545866630646, 0.5357060314905813, 1.474444589494259, 0.48635227727992836, 1.5568759511574917, 2.2805153214175724, 2.0142083586378923, 2.3757763016921394, 2.2093259883723895, 0.7794378763219836, 2.8720776710137823, 2.195070206002873, 2.079658332603392, 2.9186169896064182, 2.1386625004738615, 2.802629411145565, 0.17603797674033772, 0.5324039546451034, 1.124502553740213, 1.0493076380999455, 1.744195890973208, 0.5583223214918457, 2.379069097887579, 0.08306914545160993, 0.5713365770889663, 1.2224346928609924, 1.1705025304184877, 0.14470529568442736, 2.9506458154309234, 2.6917428928705496, 2.5617488625541105, 2.317384380319182, 0.7125322952456615, 0.3765232265537465, 1.0837847172522923, 1.2394239042739765, 1.531412840477841, 1.188193847652982, 2.5079036400976444, 0.043133571081973576, 0.03269028711044775, 0.7144484446282852]

In [9]:
int(6029/201)

29