In [1]:
import gym

from stable_baselines3 import PPO
from stable_baselines3 import SAC
from stable_baselines3 import mSAC
from stable_baselines3.common.evaluation import evaluate_policy, evaluate_meta_policy

import numpy as np
from gym import spaces

##pyfly stuff
from pyfly.pyfly import PyFly
from pyfly.pid_controller import PIDController
import numpy as np
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d.axes3d as p3
import matplotlib.animation as animation

In [2]:
#export 
class simrecorder:
   
    
    class prog():
        '''
        Progressbar class. Used for displaying the progress during simulation and rendering. Requires tqdm.
        '''
        def __init__(self,max_it, name, unit, pos):
            from tqdm.notebook import tqdm
            self.bar = tqdm(total=max_it, position=pos, leave=True, unit=unit, desc=name)
            
        def update(self,val):
            self.bar.update(val)
            
        def disable(self):
            self.bar.disable=True
    
    def __init__(self, simduration):
        '''
        Initialize main class, used for recording, plotting
        '''
        self.simduration = simduration # In sim.dt steps
        self.simpb = self.prog(simduration,'Simulating',' Step',0) # progressbar for simulation
    
        ## Arrays used for storing simulation results
        self.res_n = np.zeros(simduration)
        self.res_e = np.zeros(simduration)
        self.res_d = np.zeros(simduration)
        self.roll = np.zeros(simduration)
        self.pitch = np.zeros(simduration)
        self.yaw = np.zeros(simduration)
        self.ran = False
    
    def savestate(self, state, idd):
        '''
        This function is called every step of the simulation 
        and passed simulation object as well as the current simulation step
        '''
#        self.res_n[idd] = state['position_n'].value
#        self.res_e[idd] = state['position_e'].value
#        self.res_d[idd] = -state['position_d'].value
#        self.roll[idd] = state['roll'].value
#        self.pitch[idd] = state['pitch'].value
#        self.yaw[idd] = state['yaw'].value
        if self.ran == False:
            self.ran= True
        
        self.simpb.update(1) # Update progressbar
        
    def read_obj(self, filename):
        '''
        from https://gist.github.com/yzhong52/7c3e0b3a201af45f0cd12f10e06b9d95
        load a .obj file into triangles and vertices
        '''
        triangles = []
        vertices = []
        with open(filename) as file:
            for line in file:
                components = line.strip(' \n').split(' ')
                if components[0] == "f": # face data
                    # e.g. "f 1/1/1/ 2/2/2 3/3/3 4/4/4 ..."
                    indices = list(map(lambda c: int(c.split('/')[0]) - 1, components[1:]))
                    for i in range(0, len(indices) - 2):
                        triangles.append(indices[i: i+3])
                elif components[0] == "v": # vertex data
                    # e.g. "v  30.2180 89.5757 -76.8089"
                    vertex = list(map(lambda c: float(c), components[1:]))
                    vertices.append(vertex)
        return np.array(vertices), np.array(triangles)

    
    
    def rotation_matrix(self, axis, theta):
        import math
        import numpy as np
        """
        Return the rotation matrix associated with counterclockwise rotation about
        the given axis by theta radians.
        """
        axis = np.asarray(axis)
        axis = axis / math.sqrt(np.dot(axis, axis))
        a = math.cos(theta / 2.0)
        b, c, d = -axis * math.sin(theta / 2.0)
        aa, bb, cc, dd = a * a, b * b, c * c, d * d
        bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
        return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
                         [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
                         [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]])
    
    
        
    def plot(self, rotate=180, interval = 10):
        '''
        Main plotting functions accepting the following optional parameters:
            - rotate: azimuth rotation of final 3D plot.
            - interval: 
        '''
        from IPython.core.display import HTML
        import matplotlib.pyplot as plt
        import mpl_toolkits.mplot3d.axes3d as p3
        import matplotlib.animation as animation        
        
        
        self.fig = plt.figure(figsize=(10,10))
        self.fig.suptitle('')
        
        data = [self.res_n, self.res_e, self.res_d]

        # 
        # top left plot
        ax1 = self.fig.add_subplot(2, 2, 1)
        ax1.set_xlim([np.min(self.res_n)-2,np.max(self.res_n)+2])
        ax1.set_xlabel('X')
        ax1.set_ylim([np.min(self.res_e)-2,np.max(self.res_e)+2])
        ax1.set_ylabel('Y')

        # top right plot
        ax2 = self.fig.add_subplot(2, 2, 2)
        ax2.set_xlim([np.min(self.res_n)-2,np.max(self.res_n)+2])
        ax2.set_xlabel('X')
        ax2.set_ylim([np.min(self.res_d)-2,np.max(self.res_d)+2])
        ax2.set_ylabel('Z')

        # bottom left plot
        self.ax = self.fig.add_subplot(2, 2, 3, projection='3d') #TODO: self needed here (see update_lines)?
        self.ax.set_xlim3d([np.min(data)-2,np.max(data)+2])
        self.ax.set_xlabel('X')
        self.ax.set_ylim3d([np.min(data)-2,np.max(data)+2])
        self.ax.set_ylabel('Y')
        self.ax.set_zlim3d([np.min(data)-2,np.max(data)+2])
        self.ax.set_zlabel('Z')
        
        # bottom right plot
        ax3 = self.fig.add_subplot(2, 2, 4)
        ax3.set_xlim([np.min(self.res_e)-2,np.max(self.res_e)+2])
        ax3.set_xlabel('Y')
        ax3.set_ylim([np.min(self.res_d)-2,np.max(self.res_d)+2])
        ax3.set_ylabel('Z')

        self.ax.set_title('')  #TODO: self needed here (see update_lines)?
        ax1.set_title('') # XY plottitle
        ax2.set_title('') # XZ plottitle
        ax3.set_title('') # YZ plottitle
        
        #l oad drone 3D model
        vertices, self.triangles = self.read_obj("Wing.obj")
        
        # scaling drone 3D model (this is currently at an arbitrary scale)
        self.drone_x = vertices[:,0]*0.15
        self.drone_y = vertices[:,1]*0.15
        self.drone_z = vertices[:,2]*0.15
        
        #Rotate Drone into correct initial orientation
        done = np.dot(self.rotation_matrix([0,0,1], np.pi/2), [self.drone_x,self.drone_y,self.drone_z])
        done = np.dot(self.rotation_matrix([1,0,0], np.pi/2), done)
        [self.drone_x,self.drone_y,self.drone_z] = np.dot(self.rotation_matrix([0,1,0], np.pi), done)
        
        self.drone=[self.drone_x,self.drone_y,self.drone_z]

        


        lines=[0,0,0,0]
        
        lines[0], = self.ax.plot(data[0][0:1], data[1][0:1], data[2][0:1])
        lines[1], = ax1.plot([], [], lw=2)
        lines[2], = ax2.plot([], [], lw=2)
        lines[3], = ax3.plot([], [], lw=2)
         

        # Creating the Animation object
        self.pb = self.prog(500, 'Drawing', ' Frame',0)
        anim = animation.FuncAnimation(self.fig, self.update_lines, 500, fargs=(data, lines, rotate),
                                           interval=interval, blit=False)
        
        #HTML(anim.to_html5_video())
        global nummy
        anim.save('vid_'+str(nummy)+'.mp4')
        nummy = nummy +1
        return 0


    
    # Calculates Rotation Matrix given euler angles.
    def eulerAnglesToRotationMatrix(self, theta) :
        import math

        R_x = np.array([[1,         0,                  0                   ],
                        [0,         math.cos(theta[0]), -math.sin(theta[0]) ],
                        [0,         math.sin(theta[0]), math.cos(theta[0])  ]
                        ])



        R_y = np.array([[math.cos(theta[1]),    0,      math.sin(theta[1])  ],
                        [0,                     1,      0                   ],
                        [-math.sin(theta[1]),   0,      math.cos(theta[1])  ]
                        ])

        R_z = np.array([[math.cos(theta[2]),    -math.sin(theta[2]),    0],
                        [math.sin(theta[2]),    math.cos(theta[2]),     0],
                        [0,                     0,                      1]
                        ])


        R = np.dot(R_z, np.dot( R_y, R_x ))

        return R
    
    
    def update_lines(self, num, dataLines, lines, rotate):
        self.pb.update(1)

        cnt = 0

        for line in lines:
            # NOTE: there is no .set_data() for 3 dim data...
            if str(type(line)) == '''<class 'matplotlib.lines.Line2D'>''':
                if cnt == 0:
                    line.set_data(dataLines[0][:num],dataLines[1][:num])
                elif cnt ==1:
                    line.set_data(dataLines[0][:num],dataLines[2][:num])
                else:
                    line.set_data(dataLines[1][:num],dataLines[2][:num])
                cnt = cnt + 1
            else:
                if len(self.ax.collections):
                    self.ax.collections.pop()
                
                line.set_data(dataLines[0][:num],dataLines[1][:num])
            
                line.set_3d_properties(dataLines[2][:num])
                
                ### Here is probably most optimization potential!!##########
                ### Are the signs correct???
                ### are thes euler angles or not???
                
                #yaw_rot = self.rotation_matrix([0,0,1], self.yaw[num])
                #roll_rot = self.rotation_matrix([0,1,0], self.roll[num])
                #pitch_rot = self.rotation_matrix([1,0,0], self.pitch[num])
                
                #drone = np.dot(yaw_rot, self.drone)
                #drone = np.dot(np.dot(roll_rot,yaw_rot), drone)
                #drone = np.dot(np.dot(pitch_rot,np.dot(roll_rot,yaw_rot)), drone)
               
                drone = np.dot(self.eulerAnglesToRotationMatrix([self.roll[num],self.yaw[num],self.pitch[num]]), self.drone)
            
                ##############################################################
                # ax.plot_trisurf(x, z, triangles, y, shade=True, color='white')
                self.ax.plot_trisurf(drone[0]+dataLines[0][num], drone[2]+dataLines[1][num], self.triangles, drone[1]+dataLines[2][num], shade=True, color='red')
                self.ax.view_init(elev=15., azim=45+rotate*num/self.simduration) # 15, 45
                
        return lines

In [3]:
class FooEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, task={}, n_tasks=2, **kwargs):
        super(FooEnv, self).__init__()
        
        self.reward_range = (0, 200*15)   
        print('you get this shit from oyster:', task, n_tasks, kwargs )
        
        self.action_space = spaces.Box(
          low=np.array([-1, -1, 0]), high=np.array([1,1,1]), dtype=np.float16)    
        
        self.observation_space = spaces.Box(
          low=np.array([-999,-999,-999,-999,-999,-999,-999 ]), high=np.array([999,999,999,999,999,999,999]), dtype=np.float16)
        self.sim = PyFly("/home/user/anaconda3/lib/python3.8/site-packages/pyfly/pyfly_config.json", "/home/user/anaconda3/lib/python3.8/site-packages/pyfly/x8_param.mat")
        self.sim.seed(0)
        self.sim.reset(state={"roll": 0, "pitch": 0, "Wind": 0})
        self.sim.turbulence = False
        self.sim.turbulence_intensity = 'none'
        
        #self.rec = simrecorder(500)
        self.i = 0
        self.pef = 0
        self.reward_sum = 0
        self.start_height = self.sim.state["position_d"].value
        self.hgtm1 = self.sim.state["position_d"].value
        print(self.observation_space)
        
        
    def reset_task(self, idx):
        print('###################################################')
        self.sim = PyFly("/home/user/anaconda3/lib/python3.8/site-packages/pyfly/pyfly_config.json", "/home/user/anaconda3/lib/python3.8/site-packages/pyfly/x8_param.mat")
        #self.sim.seed(np.random.randint(100000))
#        self.reset()

        pass
    
    def reset(self):
        print('--reset--')
        #if self.rec.ran:
#            self.sim.render(block=True)
            #if self.pef%3 == 0:
        #self.sim.render(block=True)
#                self.rec.plot()
        self.pef = self.pef+1
        self.reward_sum = 0
        
        self.sim = PyFly("/home/user/anaconda3/lib/python3.8/site-packages/pyfly/pyfly_config.json", "/home/user/anaconda3/lib/python3.8/site-packages/pyfly/x8_param.mat")
        self.sim.seed(0)        
        self.sim.reset(state={"roll": 0, "pitch": 0, "Wind": 0})
        self.sim.turbulence = False
        self.sim.turbulence_intensity = 'none'
        self.start_height = self.sim.state["position_d"].value
        self.hgtm1 = self.sim.state["position_d"].value
        
        self.i = 0
        self.rec = simrecorder(500)

        return self._get_obs()
    
    def _get_obs(self):
        phi = self.sim.state["roll"].value
        theta = self.sim.state["pitch"].value
        Va = self.sim.state["Va"].value
        omega_p = self.sim.state["omega_p"].value
        omega_q = self.sim.state["omega_q"].value
        omega_r = self.sim.state["omega_r"].value
        h = self.sim.state["position_d"].value
        
        #print('obserbvtion requested')
        
        return np.array([[phi,
                           theta,
                           Va,
                           omega_p,
                           omega_q,
                           omega_r,
                           h]])
    
    def step(self, a):
#        print('ACTIOIN:', a)
        if a.shape == (1, 3):
            a = a[0]
        success, step_info = self.sim.step(a)
        
        self.rec.savestate(self.sim.state, self.i)
        self.i += 1
        
        
        #forward_reward = (self.start_height - self.sim.state["position_d"].value)**3
        forward_reward = 20* (self.hgtm1 - self.sim.state["position_d"].value) 
        
#        print(forward_reward,self.start_height,self.sim.state["position_d"].value)
        
        ctrl_cost = 0#0.5 * np.square(a).sum()
#        contact_cost = 0.

        survive_reward = 0
#        print(ctrl_cost)
        done = ( not success ) or self.i >= 500
        reward = forward_reward - ctrl_cost + survive_reward #- #- contact_cost + survive_reward
        
                  
        obs = self._get_obs()
        #print('obs:', obs)
#        print('reward:', reward, 'height: ', -self.sim.state["position_d"].value, 'height gained:', self.start_height - self.sim.state["position_d"].value)
        
        self.reward_sum += reward
    
        if self.i%500 == 0 or done:
            print('total height gained: ', self.start_height - self.sim.state["position_d"].value , 'total reward:', self.reward_sum)
            self.sim.render(block=True)
        self.hgtm1 = self.sim.state["position_d"].value
        return obs, reward, done, dict(
            reward_forward=forward_reward,
            reward_ctrl=-ctrl_cost)
    _goal = np.inf        
    
    
    def close(self):
        self.reset()


In [4]:
import numpy as np
from gym.envs.mujoco import HalfCheetahEnv as HalfCheetahEnv_

class HalfCheetahEnv(HalfCheetahEnv_):
    def _get_obs(self):
        return np.concatenate([
            self.sim.data.qpos.flat[1:],
            self.sim.data.qvel.flat,
            self.get_body_com("torso").flat,
        ]).astype(np.float32).flatten()

    def viewer_setup(self):
        camera_id = self.model.camera_name2id('track')
        self.viewer.cam.type = 2
        self.viewer.cam.fixedcamid = camera_id
        self.viewer.cam.distance = self.model.stat.extent * 0.35
        # Hide the overlay
        self.viewer._hide_overlay = True

    def render(self, mode='human'):
        if mode == 'rgb_array':
            self._get_viewer().render()
            # window size used for old mujoco-py:
            width, height = 500, 500
            data = self._get_viewer().read_pixels(width, height, depth=False)
            return data
        elif mode == 'human':
            self._get_viewer().render()
            
import numpy as np



class HalfCheetahVelEnv(HalfCheetahEnv):
    """Half-cheetah environment with target velocity, as described in [1]. The
    code is adapted from
    https://github.com/cbfinn/maml_rl/blob/9c8e2ebd741cb0c7b8bf2d040c4caeeb8e06cc95/rllab/envs/mujoco/half_cheetah_env_rand.py
    The half-cheetah follows the dynamics from MuJoCo [2], and receives at each
    time step a reward composed of a control cost and a penalty equal to the
    difference between its current velocity and the target velocity. The tasks
    are generated by sampling the target velocities from the uniform
    distribution on [0, 2].
    [1] Chelsea Finn, Pieter Abbeel, Sergey Levine, "Model-Agnostic
        Meta-Learning for Fast Adaptation of Deep Networks", 2017
        (https://arxiv.org/abs/1703.03400)
    [2] Emanuel Todorov, Tom Erez, Yuval Tassa, "MuJoCo: A physics engine for
        model-based control", 2012
        (https://homes.cs.washington.edu/~todorov/papers/TodorovIROS12.pdf)
    """
    def __init__(self, task={}, n_tasks=30000, randomize_tasks=True):
        self._task = task
        self.tasks = self.sample_tasks(n_tasks)
        self._goal_vel = self.tasks[0].get('velocity', 0.0)
        self._goal = self._goal_vel
        self.i = 0
        super(HalfCheetahVelEnv, self).__init__()

    def step(self, action):
        xposbefore = self.sim.data.qpos[0]
        self.do_simulation(action, self.frame_skip)
        xposafter = self.sim.data.qpos[0]

        forward_vel = (xposafter - xposbefore) / self.dt
        forward_reward = -1.0 * abs(forward_vel - self._goal_vel)
        ctrl_cost = 0.5 * 1e-1 * np.sum(np.square(action))

        observation = self._get_obs()
        reward = forward_reward - ctrl_cost
        if self.i >= 200:
            done = True
            self.i = 0
            print(self._goal_vel)
            del self.tasks[0]
            self._goal_vel = self.tasks[0].get('velocity', 0.0)
            self._goal = self._goal_vel
            
        else:
            done = False
            self.i += 1
        infos = dict(reward_forward=forward_reward,
            reward_ctrl=-ctrl_cost, task=self._task)
        return (observation, reward, done, infos)

    def sample_tasks(self, num_tasks):
        np.random.seed(666)
        print('goal sampled')
        velocities = np.random.uniform(0.0, 3.0, size=(num_tasks,))
        tasks = [{'velocity': velocity} for velocity in velocities]
        return tasks

    def get_all_task_idx(self):
        return range(len(self.tasks))

    def reset_task(self, idx):
        self._task = self.tasks[idx]
        self._goal_vel = self._task['velocity']
        self._goal = self._goal_vel
        self.reset()

In [5]:
(1e6)/20000


50.0

In [None]:
env = HalfCheetahVelEnv(n_tasks = 5000000)#gym.make('BipedalWalker-v3')#FooEnv()#

meta_model = mSAC('MlpPolicy', env, verbose=1,policy_kwargs=dict(net_arch=[300, 300, 300], latent_dim = 5, hidden_sizes=[200,200,200]))#,learning_rate=0.0006)

meta_reward = []
meta_std = []

print('-Start-')
n_eval = 30

meta_model_mean_reward_before, meta_model_std_reward_before = evaluate_meta_policy(meta_model, env, n_eval_episodes=n_eval)
meta_reward.append(meta_model_mean_reward_before)
meta_std.append(meta_model_std_reward_before)



print('##################################Start Learning##################################')
for i in range(100):
    
    meta_model.learn(total_timesteps=20000)#, eval_freq=100, n_eval_episodes=5)
    meta_model_mean_reward, meta_model_std_reward = evaluate_meta_policy(meta_model, env, n_eval_episodes=n_eval)

    meta_reward.append(meta_model_mean_reward)
    meta_std.append(meta_model_std_reward)
    
    
    print('epoch:', i)
    print('meta_reward = ', meta_reward)
    print('meta_std = ', meta_std)
    
env.close()

goal sampled
Using cpu device
Wrapping the env in a DummyVecEnv.
critic with  31
critic with  31
critic with  31
critic with  31
-Start-
2.101311365573504
2.5325599285513034
2.0295430078128764
2.1835741717442243
2.8543738723390177
0.0381095911043029
1.2407630963595704
0.1464383814000001
0.29978568396363425
1.5241989173010195
0.6007426180034324
2.232462506442291
0.5786760091217512
2.102534256497944
0.8796843176077019
2.323438362805766
0.015326516291399583
0.3385729608951239
0.33286101596233997
0.7430046860850907
0.06970889771671174
2.1819634627055073
1.0201048258638807
0.5925094691789949
2.727538778518763
2.9350409550453858
1.5984076323182648
0.7773955481068675
1.7514378564759157
0.977071958800826
##################################Start Learning##################################
2.6666979413289273
1.8792135802692749
2.4566210717334553
1.6420362611188852
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             

2.90756750530061
2.2154133647125143
2.158651823349113
2.6791301595028116
------------------------------------
| time/              |             |
|    episodes        | 48          |
|    fps             | 19          |
|    time_elapsed    | 485         |
|    total timesteps | 9648        |
| train/             |             |
|    KL_loss         | 0.04385365  |
|    actor_loss      | 28.4        |
|    avg. z          | 0.013132486 |
|    avg. z var      | 0.87740934  |
|    critic_loss     | 6.18        |
|    ent_coef        | 0.0585      |
|    ent_coef_loss   | -21.8       |
|    learning_rate   | 0.0003      |
|    n_updates       | 9647        |
------------------------------------
2.8880240437697964
0.591150683530195
2.1437698926584905
0.4857718178411339
-------------------------------------
| time/              |              |
|    episodes        | 52           |
|    fps             | 19           |
|    time_elapsed    | 525          |
|    total timesteps | 10452     

0.269474830630996
2.038210723107573
1.9677849667655831
0.011833197811942475
-------------------------------------
| time/              |              |
|    episodes        | 96           |
|    fps             | 20           |
|    time_elapsed    | 960          |
|    total timesteps | 19296        |
| train/             |              |
|    KL_loss         | 0.042280518  |
|    actor_loss      | 98.4         |
|    avg. z          | -0.040304538 |
|    avg. z var      | 0.84386444   |
|    critic_loss     | 16.5         |
|    ent_coef        | 0.00353      |
|    ent_coef_loss   | -37          |
|    learning_rate   | 0.0003       |
|    n_updates       | 19295        |
-------------------------------------
0.19962401770421812
0.0033632737069495544
1.9982514507114943
1.1569534792921967
0.28217482121291393
1.3757027191077593
1.9330251852400004
1.784993216877644
2.3718092214736695
2.3999072075920056
2.0390937568347427
1.3162555564946936
0.7870766782725064
0.7095656504682741
2.517006

1.1319872658387045
1.1143883410657538
2.7979019349382748
2.6361280075847464
-------------------------------------
| time/              |              |
|    episodes        | 44           |
|    fps             | 20           |
|    time_elapsed    | 438          |
|    total timesteps | 8844         |
| train/             |              |
|    KL_loss         | 0.039955907  |
|    actor_loss      | 144          |
|    avg. z          | -0.019022232 |
|    avg. z var      | 0.8387195    |
|    critic_loss     | 94.7         |
|    ent_coef        | 0.000342     |
|    ent_coef_loss   | -5.2         |
|    learning_rate   | 0.0003       |
|    n_updates       | 28843        |
-------------------------------------
1.9062086719690092
0.2193354659362543
2.541931087800947
2.5518968644033015
------------------------------------
| time/              |             |
|    episodes        | 48          |
|    fps             | 20          |
|    time_elapsed    | 480         |
|    total timeste

1.6923060982589693
2.7629081443327848
2.169533340464194
2.860731415629016
-------------------------------------
| time/              |              |
|    episodes        | 92           |
|    fps             | 19           |
|    time_elapsed    | 925          |
|    total timesteps | 18492        |
| train/             |              |
|    KL_loss         | 0.051447667  |
|    actor_loss      | 162          |
|    avg. z          | -0.019664612 |
|    avg. z var      | 0.81797063   |
|    critic_loss     | 26           |
|    ent_coef        | 3.85e-05     |
|    ent_coef_loss   | 5.59         |
|    learning_rate   | 0.0003       |
|    n_updates       | 38491        |
-------------------------------------
0.7679958041416519
1.931167939181293
1.0815860907931105
1.2629416688910837
--------------------------------------
| time/              |               |
|    episodes        | 96            |
|    fps             | 19            |
|    time_elapsed    | 967           |
|    total

0.9783443946105542
0.14819131371612682
2.5789256412592847
2.37616428801651
-------------------------------------
| time/              |              |
|    episodes        | 40           |
|    fps             | 20           |
|    time_elapsed    | 394          |
|    total timesteps | 8040         |
| train/             |              |
|    KL_loss         | 0.019287549  |
|    actor_loss      | 163          |
|    avg. z          | -0.008930844 |
|    avg. z var      | 0.9118918    |
|    critic_loss     | 13.4         |
|    ent_coef        | 0.000524     |
|    ent_coef_loss   | 12.9         |
|    learning_rate   | 0.0003       |
|    n_updates       | 48039        |
-------------------------------------
1.5568427059583314
0.6583127883511608
1.942839406961733
2.4325003907130585
-------------------------------------
| time/              |              |
|    episodes        | 44           |
|    fps             | 20           |
|    time_elapsed    | 435          |
|    total tim

0.8870688122082827
0.47622207767863245
2.0595583121229293
2.562817378677796
------------------------------------
| time/              |             |
|    episodes        | 88          |
|    fps             | 19          |
|    time_elapsed    | 893         |
|    total timesteps | 17688       |
| train/             |             |
|    KL_loss         | 0.02237736  |
|    actor_loss      | 157         |
|    avg. z          | -0.02683878 |
|    avg. z var      | 0.9224292   |
|    critic_loss     | 18.4        |
|    ent_coef        | 0.00954     |
|    ent_coef_loss   | 9.91        |
|    learning_rate   | 0.0003      |
|    n_updates       | 57687       |
------------------------------------
2.122276421598051
1.689059230828279
1.0096032194152411
2.733332462471708
--------------------------------------
| time/              |               |
|    episodes        | 92            |
|    fps             | 19            |
|    time_elapsed    | 937           |
|    total timesteps | 1849

2.3659216870434125
2.6050907529870795
0.5128997494200257
2.0882291077611987
--------------------------------------
| time/              |               |
|    episodes        | 36            |
|    fps             | 18            |
|    time_elapsed    | 381           |
|    total timesteps | 7236          |
| train/             |               |
|    KL_loss         | 0.020293713   |
|    actor_loss      | 155           |
|    avg. z          | -0.0052250554 |
|    avg. z var      | 0.88397205    |
|    critic_loss     | 41.1          |
|    ent_coef        | 0.0168        |
|    ent_coef_loss   | 1.89          |
|    learning_rate   | 0.0003        |
|    n_updates       | 67235         |
--------------------------------------
0.8888443939277852
0.9421428401708976
2.409867962112238
0.8753929182003255
-------------------------------------
| time/              |              |
|    episodes        | 40           |
|    fps             | 19           |
|    time_elapsed    | 422        

1.7337186335975658
2.1575707844298693
2.4068339790213793
0.019020776439600318
-------------------------------------
| time/              |              |
|    episodes        | 84           |
|    fps             | 18           |
|    time_elapsed    | 892          |
|    total timesteps | 16884        |
| train/             |              |
|    KL_loss         | 0.015548557  |
|    actor_loss      | 158          |
|    avg. z          | 0.0016868521 |
|    avg. z var      | 0.8984855    |
|    critic_loss     | 22.4         |
|    ent_coef        | 0.0209       |
|    ent_coef_loss   | 1.3          |
|    learning_rate   | 0.0003       |
|    n_updates       | 76883        |
-------------------------------------
2.7316661504019315
0.7109470601515316
0.2893955609188368
1.039309830639183
------------------------------------
| time/              |             |
|    episodes        | 88          |
|    fps             | 18          |
|    time_elapsed    | 937         |
|    total times

2.6332137109020075
0.5760724803545724
1.5689926333133577
1.8645176774673704
------------------------------------
| time/              |             |
|    episodes        | 32          |
|    fps             | 18          |
|    time_elapsed    | 341         |
|    total timesteps | 6432        |
| train/             |             |
|    KL_loss         | 0.041626096 |
|    actor_loss      | 160         |
|    avg. z          | 0.03280119  |
|    avg. z var      | 0.837756    |
|    critic_loss     | 57.5        |
|    ent_coef        | 0.0285      |
|    ent_coef_loss   | -0.648      |
|    learning_rate   | 0.0003      |
|    n_updates       | 86431       |
------------------------------------
0.18238525824611596
0.056097790555659
0.894135007014854
1.514202565650733
-------------------------------------
| time/              |              |
|    episodes        | 36           |
|    fps             | 18           |
|    time_elapsed    | 387          |
|    total timesteps | 7236    

1.3562408354271895
1.2863270662164559
1.825502232265344
2.208278950605087
-------------------------------------
| time/              |              |
|    episodes        | 80           |
|    fps             | 18           |
|    time_elapsed    | 865          |
|    total timesteps | 16080        |
| train/             |              |
|    KL_loss         | 0.01671524   |
|    actor_loss      | 156          |
|    avg. z          | -0.023550835 |
|    avg. z var      | 0.89452726   |
|    critic_loss     | 48.6         |
|    ent_coef        | 0.0314       |
|    ent_coef_loss   | 1.14         |
|    learning_rate   | 0.0003       |
|    n_updates       | 96079        |
-------------------------------------
1.0576087201463014
1.0680418554075652
1.7741011098048645
2.469372743891114
------------------------------------
| time/              |             |
|    episodes        | 84          |
|    fps             | 18          |
|    time_elapsed    | 907         |
|    total timesteps

0.4274019187015978
2.510511772070913
1.1795384620981704
2.4318368771498524
------------------------------------
| time/              |             |
|    episodes        | 28          |
|    fps             | 18          |
|    time_elapsed    | 303         |
|    total timesteps | 5628        |
| train/             |             |
|    KL_loss         | 0.035630032 |
|    actor_loss      | 153         |
|    avg. z          | 0.019945221 |
|    avg. z var      | 0.8626601   |
|    critic_loss     | 68.1        |
|    ent_coef        | 0.0359      |
|    ent_coef_loss   | 1.89        |
|    learning_rate   | 0.0003      |
|    n_updates       | 105627      |
------------------------------------
1.8489101393442615
1.9172164744332576
0.3675922138010669
1.066164992272277
------------------------------------
| time/              |             |
|    episodes        | 32          |
|    fps             | 18          |
|    time_elapsed    | 348         |
|    total timesteps | 6432        |

1.7965397167898738
2.1208231891772575
0.7326326512093558
1.5384123784336396
------------------------------------
| time/              |             |
|    episodes        | 76          |
|    fps             | 18          |
|    time_elapsed    | 843         |
|    total timesteps | 15276       |
| train/             |             |
|    KL_loss         | 0.03606496  |
|    actor_loss      | 147         |
|    avg. z          | -0.03903104 |
|    avg. z var      | 0.85987407  |
|    critic_loss     | 56.7        |
|    ent_coef        | 0.0378      |
|    ent_coef_loss   | -1.63       |
|    learning_rate   | 0.0003      |
|    n_updates       | 115275      |
------------------------------------
0.3776547050201332
1.4111432037055813
1.1849849142316193
0.498859164437812
------------------------------------
| time/              |             |
|    episodes        | 80          |
|    fps             | 18          |
|    time_elapsed    | 886         |
|    total timesteps | 16080       

2.20490144140891
2.9154294438611377
2.885353173115911
0.7124907066826475
------------------------------------
| time/              |             |
|    episodes        | 24          |
|    fps             | 18          |
|    time_elapsed    | 260         |
|    total timesteps | 4824        |
| train/             |             |
|    KL_loss         | 0.031510822 |
|    actor_loss      | 145         |
|    avg. z          | -0.01210363 |
|    avg. z var      | 0.86668193  |
|    critic_loss     | 47          |
|    ent_coef        | 0.0346      |
|    ent_coef_loss   | -2.88       |
|    learning_rate   | 0.0003      |
|    n_updates       | 124823      |
------------------------------------
2.860329874340898
2.3764571517851603
1.0256320013057465
2.505357016880623
--------------------------------------
| time/              |               |
|    episodes        | 28            |
|    fps             | 18            |
|    time_elapsed    | 306           |
|    total timesteps | 5628  

0.9069433777749284
0.578059715588895
0.5458020271401579
0.4867786531795414
-------------------------------------
| time/              |              |
|    episodes        | 72           |
|    fps             | 18           |
|    time_elapsed    | 787          |
|    total timesteps | 14472        |
| train/             |              |
|    KL_loss         | 0.009412669  |
|    actor_loss      | 148          |
|    avg. z          | -0.009130882 |
|    avg. z var      | 0.9223677    |
|    critic_loss     | 79.2         |
|    ent_coef        | 0.0445       |
|    ent_coef_loss   | -0.947       |
|    learning_rate   | 0.0003       |
|    n_updates       | 134471       |
-------------------------------------
0.13516997180195633
0.6110759742745868
0.34715900720430615
1.9465051827662467
-------------------------------------
| time/              |              |
|    episodes        | 76           |
|    fps             | 18           |
|    time_elapsed    | 830          |
|    total 

2.5490643991167126
2.2174594817551845
2.4714928224563106
2.7195278494790402
-------------------------------------
| time/              |              |
|    episodes        | 20           |
|    fps             | 17           |
|    time_elapsed    | 224          |
|    total timesteps | 4020         |
| train/             |              |
|    KL_loss         | 0.013258025  |
|    actor_loss      | 149          |
|    avg. z          | -0.040490318 |
|    avg. z var      | 0.9287081    |
|    critic_loss     | 122          |
|    ent_coef        | 0.0394       |
|    ent_coef_loss   | 0.836        |
|    learning_rate   | 0.0003       |
|    n_updates       | 144019       |
-------------------------------------
1.951028508909772
0.18550361049501984
1.4079234887816485
1.1935289261421476
------------------------------------
| time/              |             |
|    episodes        | 24          |
|    fps             | 17          |
|    time_elapsed    | 271         |
|    total timest

2.632496493969541
2.2705543676488524
0.8324186013092658
1.9363101023737115
-------------------------------------
| time/              |              |
|    episodes        | 68           |
|    fps             | 17           |
|    time_elapsed    | 759          |
|    total timesteps | 13668        |
| train/             |              |
|    KL_loss         | 0.05961559   |
|    actor_loss      | 150          |
|    avg. z          | -0.009252387 |
|    avg. z var      | 0.82072866   |
|    critic_loss     | 8.89         |
|    ent_coef        | 0.0434       |
|    ent_coef_loss   | -1.3         |
|    learning_rate   | 0.0003       |
|    n_updates       | 153667       |
-------------------------------------
1.6938320633590753
2.2605964500363376
0.02955408832588391
1.987890398196115
------------------------------------
| time/              |             |
|    episodes        | 72          |
|    fps             | 17          |
|    time_elapsed    | 806         |
|    total timeste

2.2876960733555767
1.2487555115820457
1.9541835079806285
-------------------------------------
| time/              |              |
|    episodes        | 12           |
|    fps             | 18           |
|    time_elapsed    | 132          |
|    total timesteps | 2412         |
| train/             |              |
|    KL_loss         | 0.017157897  |
|    actor_loss      | 150          |
|    avg. z          | -0.015223871 |
|    avg. z var      | 0.89133584   |
|    critic_loss     | 57.4         |
|    ent_coef        | 0.0435       |
|    ent_coef_loss   | -4.34        |
|    learning_rate   | 0.0003       |
|    n_updates       | 162411       |
-------------------------------------
0.7609716974204864
1.8307170399024608
2.1740771945333646
2.020568331905044
------------------------------------
| time/              |             |
|    episodes        | 16          |
|    fps             | 18          |
|    time_elapsed    | 175         |
|    total timesteps | 3216        |


1.4664221168125562
1.0343800312886327
2.2410757495125306
0.7697743415795295
-------------------------------------
| time/              |              |
|    episodes        | 60           |
|    fps             | 18           |
|    time_elapsed    | 669          |
|    total timesteps | 12060        |
| train/             |              |
|    KL_loss         | 0.02944244   |
|    actor_loss      | 147          |
|    avg. z          | -0.010968702 |
|    avg. z var      | 0.88546896   |
|    critic_loss     | 58.2         |
|    ent_coef        | 0.0455       |
|    ent_coef_loss   | 1.01         |
|    learning_rate   | 0.0003       |
|    n_updates       | 172059       |
-------------------------------------
2.5931885260980287
0.00037995079279384836
2.8959799230566574
2.5133819359452816
--------------------------------------
| time/              |               |
|    episodes        | 64            |
|    fps             | 18            |
|    time_elapsed    | 713           |
|  

0.6882795156122168
0.19605228596409374
2.9881871086584706
0.2919122688891981
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 18           |
|    time_elapsed    | 44           |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 0.035433438  |
|    actor_loss      | 150          |
|    avg. z          | -0.026992574 |
|    avg. z var      | 0.88519555   |
|    critic_loss     | 17.3         |
|    ent_coef        | 0.051        |
|    ent_coef_loss   | -3.29        |
|    learning_rate   | 0.0003       |
|    n_updates       | 180803       |
-------------------------------------
2.596587470817174
0.03823531628548882
1.6507028022603167
1.584751768876953
-------------------------------------
| time/              |              |
|    episodes        | 8            |
|    fps             | 17           |
|    time_elapsed    | 91           |
|    total t

0.3744188648716845
2.9670923159189684
2.2513313721513644
0.949539973434288
-------------------------------------
| time/              |              |
|    episodes        | 52           |
|    fps             | 17           |
|    time_elapsed    | 588          |
|    total timesteps | 10452        |
| train/             |              |
|    KL_loss         | 0.01735261   |
|    actor_loss      | 147          |
|    avg. z          | -0.009468749 |
|    avg. z var      | 0.88982594   |
|    critic_loss     | 116          |
|    ent_coef        | 0.0504       |
|    ent_coef_loss   | 2.78         |
|    learning_rate   | 0.0003       |
|    n_updates       | 190451       |
-------------------------------------
2.682763524234081
1.4367316117329731
1.6602863551510803
2.779932120256673
------------------------------------
| time/              |             |
|    episodes        | 56          |
|    fps             | 17          |
|    time_elapsed    | 631         |
|    total timesteps

2.815943904069689
1.2400577220564615
1.4147208568442848
0.07163715143306604
0.4068592453531289
0.5743977179569828
2.456111950655078
0.25968666380171435
2.8120378666239745
0.07301881983013292
0.6058239917126318
0.7745551245605455
1.8403789776755728
1.4146608097597277
0.07425980522325515
2.9387342503639737
0.02214760376053504
1.1734346419139552
0.15076050716217104
2.4790056790322574
0.6701081632193686
0.6522361831227498
2.6536408787038606
2.9881012451269373
0.8058009391105635
0.003519607185571094
1.9455822253102926
2.3817625802261944
2.8147874755518023
1.1410872069509046
1.6532633861756834
0.22482403253447802
1.3172454463163377
epoch: 9
meta_reward =  [-318.68103072624336, -326.54882258410385, -304.515222570106, -287.7606776442862, -244.79878016444496, -278.13833711494055, -306.5244451324483, -294.9255042027479, -275.90134690340443, -265.822057415149]
meta_std =  [147.71888857471905, 175.5345762966199, 168.45162860360475, 131.39012944194258, 85.41128004439615, 97.7960981448435, 113.99162

2.3161596525180337
1.5023781566004923
2.497580216784272
1.3370485575976923
-------------------------------------
| time/              |              |
|    episodes        | 44           |
|    fps             | 18           |
|    time_elapsed    | 478          |
|    total timesteps | 8844         |
| train/             |              |
|    KL_loss         | 0.019604176  |
|    actor_loss      | 146          |
|    avg. z          | -0.011865241 |
|    avg. z var      | 0.8975522    |
|    critic_loss     | 18.9         |
|    ent_coef        | 0.0498       |
|    ent_coef_loss   | -0.608       |
|    learning_rate   | 0.0003       |
|    n_updates       | 208843       |
-------------------------------------
2.5612588734635295
2.2957998435266824
1.9500562802014532
1.434034058577684
--------------------------------------
| time/              |               |
|    episodes        | 48            |
|    fps             | 18            |
|    time_elapsed    | 525           |
|    tota

2.140716858188468
2.9974279142616265
1.4598078430204962
0.29838709298800525
------------------------------------
| time/              |             |
|    episodes        | 92          |
|    fps             | 18          |
|    time_elapsed    | 1010        |
|    total timesteps | 18492       |
| train/             |             |
|    KL_loss         | 0.013358492 |
|    actor_loss      | 143         |
|    avg. z          | 0.014876327 |
|    avg. z var      | 0.9252655   |
|    critic_loss     | 10.5        |
|    ent_coef        | 0.0497      |
|    ent_coef_loss   | 1.52        |
|    learning_rate   | 0.0003      |
|    n_updates       | 218491      |
------------------------------------
0.1865307795190163
1.6009498836679357
0.19840175029874974
0.751597053586005
--------------------------------------
| time/              |               |
|    episodes        | 96            |
|    fps             | 18            |
|    time_elapsed    | 1052          |
|    total timesteps | 1

2.5324363033760937
2.933481525044074
1.919375198694873
0.6140953285540469
-------------------------------------
| time/              |              |
|    episodes        | 36           |
|    fps             | 17           |
|    time_elapsed    | 406          |
|    total timesteps | 7236         |
| train/             |              |
|    KL_loss         | 0.008240517  |
|    actor_loss      | 144          |
|    avg. z          | -0.011601334 |
|    avg. z var      | 0.9244215    |
|    critic_loss     | 115          |
|    ent_coef        | 0.0519       |
|    ent_coef_loss   | 1.65         |
|    learning_rate   | 0.0003       |
|    n_updates       | 227235       |
-------------------------------------
0.631956661914585
0.7711277282278768
1.0815715553271859
0.665668170650351
--------------------------------------
| time/              |               |
|    episodes        | 40            |
|    fps             | 17            |
|    time_elapsed    | 449           |
|    total 

1.5773643452264476
0.9317913043970276
0.9448615179108186
0.31641436788390775
-------------------------------------
| time/              |              |
|    episodes        | 84           |
|    fps             | 17           |
|    time_elapsed    | 952          |
|    total timesteps | 16884        |
| train/             |              |
|    KL_loss         | 0.089463666  |
|    actor_loss      | 142          |
|    avg. z          | -0.023863753 |
|    avg. z var      | 0.8115385    |
|    critic_loss     | 180          |
|    ent_coef        | 0.051        |
|    ent_coef_loss   | 0.202        |
|    learning_rate   | 0.0003       |
|    n_updates       | 236883       |
-------------------------------------
1.249611438179774
0.3814323905108221
0.7698069083617192
2.9833700128194485
------------------------------------
| time/              |             |
|    episodes        | 88          |
|    fps             | 17          |
|    time_elapsed    | 997         |
|    total timest

0.4462120126024073
2.3873764831135436
2.744445212359908
1.3551224542237614
------------------------------------
| time/              |             |
|    episodes        | 28          |
|    fps             | 18          |
|    time_elapsed    | 312         |
|    total timesteps | 5628        |
| train/             |             |
|    KL_loss         | 0.011695046 |
|    actor_loss      | 142         |
|    avg. z          | 0.024989773 |
|    avg. z var      | 0.9292846   |
|    critic_loss     | 11.4        |
|    ent_coef        | 0.0417      |
|    ent_coef_loss   | -0.223      |
|    learning_rate   | 0.0003      |
|    n_updates       | 245627      |
------------------------------------
2.744231795835281
2.540730498855363
1.8629567378509009
1.6994882961276994
-------------------------------------
| time/              |              |
|    episodes        | 32           |
|    fps             | 18           |
|    time_elapsed    | 356          |
|    total timesteps | 6432     

1.9563160127069232
0.4420547036070155
0.2571518833002491
2.3582265229168002
--------------------------------------
| time/              |               |
|    episodes        | 76            |
|    fps             | 17            |
|    time_elapsed    | 850           |
|    total timesteps | 15276         |
| train/             |               |
|    KL_loss         | 0.017475046   |
|    actor_loss      | 140           |
|    avg. z          | -0.0010332078 |
|    avg. z var      | 0.8963922     |
|    critic_loss     | 90.1          |
|    ent_coef        | 0.0446        |
|    ent_coef_loss   | -3.65         |
|    learning_rate   | 0.0003        |
|    n_updates       | 255275        |
--------------------------------------
0.42033389356844086
1.1495520564361454
2.613063527921729
0.23271593000778656
-------------------------------------
| time/              |              |
|    episodes        | 80           |
|    fps             | 17           |
|    time_elapsed    | 896      

2.848680789999345
1.5180484544980581
1.4506864445800602
1.9168695011042751
------------------------------------
| time/              |             |
|    episodes        | 20          |
|    fps             | 17          |
|    time_elapsed    | 235         |
|    total timesteps | 4020        |
| train/             |             |
|    KL_loss         | 0.020650018 |
|    actor_loss      | 140         |
|    avg. z          | -0.04707976 |
|    avg. z var      | 0.9068204   |
|    critic_loss     | 8.74        |
|    ent_coef        | 0.0419      |
|    ent_coef_loss   | 3.87        |
|    learning_rate   | 0.0003      |
|    n_updates       | 264019      |
------------------------------------
0.26115039966735376
0.056753017897715474
0.6192738795639297
1.5756921264022279
-------------------------------------
| time/              |              |
|    episodes        | 24           |
|    fps             | 17           |
|    time_elapsed    | 280          |
|    total timesteps | 4824

0.39969849105605915
0.01987255336791871
0.30408428070888904
0.9238285922938614
-------------------------------------
| time/              |              |
|    episodes        | 68           |
|    fps             | 17           |
|    time_elapsed    | 780          |
|    total timesteps | 13668        |
| train/             |              |
|    KL_loss         | 0.008196486  |
|    actor_loss      | 139          |
|    avg. z          | -0.029597204 |
|    avg. z var      | 0.9520276    |
|    critic_loss     | 159          |
|    ent_coef        | 0.0409       |
|    ent_coef_loss   | -1.81        |
|    learning_rate   | 0.0003       |
|    n_updates       | 273667       |
-------------------------------------
1.7802517253396122
0.5829306527987836
2.0948635320301694
2.418572441537517
------------------------------------
| time/              |             |
|    episodes        | 72          |
|    fps             | 17          |
|    time_elapsed    | 825         |
|    total time

2.9469251466920996
2.0479403336266393
2.5580488415101725
1.3346285167725296
--------------------------------------
| time/              |               |
|    episodes        | 12            |
|    fps             | 17            |
|    time_elapsed    | 134           |
|    total timesteps | 2412          |
| train/             |               |
|    KL_loss         | 0.010674361   |
|    actor_loss      | 139           |
|    avg. z          | 0.00016345046 |
|    avg. z var      | 0.91089857    |
|    critic_loss     | 42.1          |
|    ent_coef        | 0.0421        |
|    ent_coef_loss   | -3.04         |
|    learning_rate   | 0.0003        |
|    n_updates       | 282411        |
--------------------------------------
1.4463559446120655
0.1257902023424765
1.2118476037120793
2.1823203354707017
--------------------------------------
| time/              |               |
|    episodes        | 16            |
|    fps             | 17            |
|    time_elapsed    | 181   

0.4535795257375729
0.09244041721041696
0.00695189159106302
0.882785762403808
--------------------------------------
| time/              |               |
|    episodes        | 60            |
|    fps             | 17            |
|    time_elapsed    | 682           |
|    total timesteps | 12060         |
| train/             |               |
|    KL_loss         | 0.004211381   |
|    actor_loss      | 139           |
|    avg. z          | -0.0056940764 |
|    avg. z var      | 0.95423615    |
|    critic_loss     | 6.55          |
|    ent_coef        | 0.049         |
|    ent_coef_loss   | -2.53         |
|    learning_rate   | 0.0003        |
|    n_updates       | 292059        |
--------------------------------------
0.24535865696879378
2.7633901225968636
0.19085177994238045
2.104301504859203
------------------------------------
| time/              |             |
|    episodes        | 64          |
|    fps             | 17          |
|    time_elapsed    | 725         

1.5609962617870003
2.5942792072370793
1.6980157688155986
2.0214530367589383
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 17           |
|    time_elapsed    | 45           |
|    total timesteps | 804          |
| train/             |              |
|    KL_loss         | 0.015657384  |
|    actor_loss      | 134          |
|    avg. z          | -0.023797546 |
|    avg. z var      | 0.9006368    |
|    critic_loss     | 136          |
|    ent_coef        | 0.0433       |
|    ent_coef_loss   | -1.12        |
|    learning_rate   | 0.0003       |
|    n_updates       | 300803       |
-------------------------------------
2.4197100154810887
1.4241450390313553
1.2749183474131973
0.5638216175171056
------------------------------------
| time/              |             |
|    episodes        | 8           |
|    fps             | 17          |
|    time_elapsed    | 90          |
|    total timest

0.7379823696719369
2.0594855770317197
1.8783075366765294
0.06749692402666618
------------------------------------
| time/              |             |
|    episodes        | 52          |
|    fps             | 17          |
|    time_elapsed    | 593         |
|    total timesteps | 10452       |
| train/             |             |
|    KL_loss         | 0.0263777   |
|    actor_loss      | 135         |
|    avg. z          | -0.00412009 |
|    avg. z var      | 0.8714787   |
|    critic_loss     | 39.7        |
|    ent_coef        | 0.0426      |
|    ent_coef_loss   | -0.941      |
|    learning_rate   | 0.0003      |
|    n_updates       | 310451      |
------------------------------------
1.7290238119017725
2.896898157698125
1.0345461691521556
2.4888229358284364
------------------------------------
| time/              |             |
|    episodes        | 56          |
|    fps             | 17          |
|    time_elapsed    | 639         |
|    total timesteps | 11256      

0.48847165249695856
1.4674668938394637
1.6328937232872565
0.5395025326053613
0.14605942287490326
1.9809859015495141
0.4020415600765306
1.953997224957435
2.6726887815431963
0.9292142028411015
0.4589621668574696
0.3906152626114413
2.7721602942913446
2.1142686610360637
2.8573471744693864
0.22083696789196028
0.4405169710518996
1.0582676098044215
1.4921375082601198
1.7415885047179205
1.4602989549620107
2.96638512175521
2.3017503811977256
2.4424704679461615
0.7650566274704468
1.2564916150147876
2.0317624679225545
2.841253083437331
1.033520828437815
2.756994903222424
2.6575442285849498
0.05811220418695362
0.6687165942621687
epoch: 15
meta_reward =  [-318.68103072624336, -326.54882258410385, -304.515222570106, -287.7606776442862, -244.79878016444496, -278.13833711494055, -306.5244451324483, -294.9255042027479, -275.90134690340443, -265.822057415149, -277.37067222252296, -306.931355699618, -286.0593583835666, -272.30829256723484, -275.8520396258373, -276.67852904821626]
meta_std =  [147.7188885

2.6637615850244503
2.1758333498249356
0.5789319553836437
2.682054269157899
------------------------------------
| time/              |             |
|    episodes        | 44          |
|    fps             | 17          |
|    time_elapsed    | 494         |
|    total timesteps | 8844        |
| train/             |             |
|    KL_loss         | 0.023022287 |
|    actor_loss      | 134         |
|    avg. z          | -0.0484744  |
|    avg. z var      | 0.92344224  |
|    critic_loss     | 51          |
|    ent_coef        | 0.0412      |
|    ent_coef_loss   | -2.41       |
|    learning_rate   | 0.0003      |
|    n_updates       | 328843      |
------------------------------------
2.811505278029719
0.7274810854739138
2.803334039554287
1.2965906951238146
------------------------------------
| time/              |             |
|    episodes        | 48          |
|    fps             | 17          |
|    time_elapsed    | 539         |
|    total timesteps | 9648        |


1.768922738605505
2.1652076516957015
2.638436727909758
2.472690543684128
------------------------------------
| time/              |             |
|    episodes        | 92          |
|    fps             | 17          |
|    time_elapsed    | 1065        |
|    total timesteps | 18492       |
| train/             |             |
|    KL_loss         | 0.015438799 |
|    actor_loss      | 133         |
|    avg. z          | 0.01102419  |
|    avg. z var      | 0.8976704   |
|    critic_loss     | 5.89        |
|    ent_coef        | 0.0471      |
|    ent_coef_loss   | 0.278       |
|    learning_rate   | 0.0003      |
|    n_updates       | 338491      |
------------------------------------
0.7414432093334822
2.4662821519420786
1.761421777129958
0.6116604342640634
-------------------------------------
| time/              |              |
|    episodes        | 96           |
|    fps             | 17           |
|    time_elapsed    | 1118         |
|    total timesteps | 19296     

1.24176398771805
1.7631520403188299
2.5174345857757268
2.9648831645299807
------------------------------------
| time/              |             |
|    episodes        | 36          |
|    fps             | 11          |
|    time_elapsed    | 614         |
|    total timesteps | 7236        |
| train/             |             |
|    KL_loss         | 0.01999196  |
|    actor_loss      | 132         |
|    avg. z          | 0.010071755 |
|    avg. z var      | 0.8835379   |
|    critic_loss     | 4.85        |
|    ent_coef        | 0.0475      |
|    ent_coef_loss   | 0.141       |
|    learning_rate   | 0.0003      |
|    n_updates       | 347235      |
------------------------------------
1.9692692406518222
1.011636448357035
2.8622252650933357
0.38644111855148044
-------------------------------------
| time/              |              |
|    episodes        | 40           |
|    fps             | 11           |
|    time_elapsed    | 672          |
|    total timesteps | 8040    

1.7701410748573219
2.089826093087569
0.5101223154468985
2.2731277646729144
------------------------------------
| time/              |             |
|    episodes        | 84          |
|    fps             | 12          |
|    time_elapsed    | 1328        |
|    total timesteps | 16884       |
| train/             |             |
|    KL_loss         | 0.010207169 |
|    actor_loss      | 131         |
|    avg. z          | 0.008964892 |
|    avg. z var      | 0.92207956  |
|    critic_loss     | 16.9        |
|    ent_coef        | 0.0361      |
|    ent_coef_loss   | 1.31        |
|    learning_rate   | 0.0003      |
|    n_updates       | 356883      |
------------------------------------
2.292091232152462
2.547949517611066
0.8429151753128041
2.0576802060256196
-------------------------------------
| time/              |              |
|    episodes        | 88           |
|    fps             | 12           |
|    time_elapsed    | 1394         |
|    total timesteps | 17688    

0.19300451862741197
0.17790304276035807
1.6018959648637514
1.4740633626528918
-------------------------------------
| time/              |              |
|    episodes        | 28           |
|    fps             | 8            |
|    time_elapsed    | 695          |
|    total timesteps | 5628         |
| train/             |              |
|    KL_loss         | 0.0134638995 |
|    actor_loss      | 129          |
|    avg. z          | 0.02245501   |
|    avg. z var      | 0.92411196   |
|    critic_loss     | 74.8         |
|    ent_coef        | 0.039        |
|    ent_coef_loss   | 0.475        |
|    learning_rate   | 0.0003       |
|    n_updates       | 365627       |
-------------------------------------
2.870418176249564
2.8785609482886336
2.715951950403687
0.5013726351236522
--------------------------------------
| time/              |               |
|    episodes        | 32            |
|    fps             | 8             |
|    time_elapsed    | 757           |
|    to

0.6526773516341123
1.472358711727487
2.049332271403191
0.7202847329524078
------------------------------------
| time/              |             |
|    episodes        | 76          |
|    fps             | 9           |
|    time_elapsed    | 1540        |
|    total timesteps | 15276       |
| train/             |             |
|    KL_loss         | 0.017684855 |
|    actor_loss      | 130         |
|    avg. z          | 0.01683266  |
|    avg. z var      | 0.93631727  |
|    critic_loss     | 108         |
|    ent_coef        | 0.0405      |
|    ent_coef_loss   | 2.13        |
|    learning_rate   | 0.0003      |
|    n_updates       | 375275      |
------------------------------------
0.7056952583717955
1.2045416477883197
1.4908328819907317
1.0271515407573275
-------------------------------------
| time/              |              |
|    episodes        | 80           |
|    fps             | 10           |
|    time_elapsed    | 1600         |
|    total timesteps | 16080   

0.7691124534552952
1.8675458222450287
0.9834797301000711
2.5852163297826483
------------------------------------
| time/              |             |
|    episodes        | 20          |
|    fps             | 16          |
|    time_elapsed    | 246         |
|    total timesteps | 4020        |
| train/             |             |
|    KL_loss         | 0.008264046 |
|    actor_loss      | 129         |
|    avg. z          | 0.025546798 |
|    avg. z var      | 0.940263    |
|    critic_loss     | 6.34        |
|    ent_coef        | 0.0411      |
|    ent_coef_loss   | -2.66       |
|    learning_rate   | 0.0003      |
|    n_updates       | 384019      |
------------------------------------
0.9142048354762513
1.3989233416131401
2.697086877765778
1.4461348471137776
--------------------------------------
| time/              |               |
|    episodes        | 24            |
|    fps             | 16            |
|    time_elapsed    | 293           |
|    total timesteps | 48

2.8201592386329457
1.7649684585311363
2.0165014065286577
0.4862543352139882
-------------------------------------
| time/              |              |
|    episodes        | 68           |
|    fps             | 17           |
|    time_elapsed    | 794          |
|    total timesteps | 13668        |
| train/             |              |
|    KL_loss         | 0.009044539  |
|    actor_loss      | 130          |
|    avg. z          | -0.020521522 |
|    avg. z var      | 0.9449832    |
|    critic_loss     | 61.5         |
|    ent_coef        | 0.0428       |
|    ent_coef_loss   | -2.27        |
|    learning_rate   | 0.0003       |
|    n_updates       | 393667       |
-------------------------------------
0.7182134351072635
0.4207769273935009
2.2788601508753565
1.4678375261193577
-------------------------------------
| time/              |              |
|    episodes        | 72           |
|    fps             | 17           |
|    time_elapsed    | 839          |
|    total t

In [None]:
a =

In [None]:
# imports
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib.dates as mdates
import numpy as np

fig, ax = plt.subplots(1)
ax.set_facecolor('white')

# Define upper and lower bounds for shaded variation
lower_bound_meta = (np.asarray(meta_reward) - np.asarray(meta_std))
upper_bound_meta = (np.asarray(meta_reward) + np.asarray(meta_std))

print(lower_bound_meta)

lower_bound_vanilla = (np.asarray(vanilla_reward) - np.asarray(vanilla_std))
upper_bound_vanilla =  (np.asarray(vanilla_reward) + np.asarray(vanilla_std))

fig, ax = plt.subplots(1)
ax.set_facecolor('white')

x = [a*10000 for a in range(len(meta_reward))]

ax.plot(x, meta_reward, lw=0.9, color = 'blue')
#ax.plot(x, vanilla_reward, lw=0.9, color = 'red')

# Variation and shaded area
ax.fill_between(x, lower_bound_meta, upper_bound_meta, facecolor='blue', alpha=0.3)
#ax.fill_between(x, lower_bound_vanilla, upper_bound_vanilla, facecolor='red', alpha=0.3)
fig = ax.get_figure()


fig.show()

In [None]:
x

In [None]:
vanilla_reward

In [None]:
env =FooEnv()# gym.make('LunarLanderContinuous-v2')#
#model = SAC('MlpPolicy', env, verbose=1,policy_kwargs=dict(net_arch=[300, 300, 300]))#,learning_rate=0.0006)
model = mSAC('MlpPolicy', env, verbose=1,policy_kwargs=dict(net_arch=[300, 300, 300], latent_dim = 7, hidden_sizes=[300,300,300]))#,learning_rate=0.0006)
#model = PPO('MlpPolicy', env, verbose=1,policy_kwargs=dict(net_arch=[300, 300, 300]))#,learning_rate=0.0006)

print('-Start-')
n_eval = 1

mean_reward_before, std_reward_before = evaluate_meta_policy(model, env, n_eval_episodes=n_eval)
print('Start Learning')
model.learn(total_timesteps=2500)#, eval_freq=100, n_eval_episodes=5)

mean_reward, std_reward = evaluate_meta_policy(model, env, n_eval_episodes=n_eval)
print(f"mean_reward, before training:{mean_reward_before:.2f} +/- {std_reward_before:.2f}")

print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}")