In [1]:
import numpy as np
import torch
from tsGaussian.torch_tsgaussian import TangentSpaceGaussian
from stable_baselines_utils import TangentSpaceGaussian as TSG
# from pytorch3d.transforms.so3 import (
#     so3_exp_map,
#     so3_relative_angle,
# )

In [2]:
tg = TangentSpaceGaussian(None)

# Test liegroup torch

In [3]:
from liegroups.torch import SO3

In [4]:
C = SO3.exp(torch.Tensor([[1,2,3],
                         [0,0,0]]))
C

<liegroups.torch.so3.SO3Matrix>
| tensor([[[-0.6949,  0.7135,  0.0893],
|          [-0.1920, -0.3038,  0.9332],
|          [ 0.6930,  0.6313,  0.3481]],
| 
|         [[ 1.0000,  0.0000,  0.0000],
|          [ 0.0000,  1.0000,  0.0000],
|          [ 0.0000,  0.0000,  1.0000]]])

# Test torch_tsgaussian sample

In [5]:
R_mu = torch.eye(3).reshape((1,3,3))
sigma = torch.ones(3).reshape((1,3))

In [6]:
R_quat, R_x = tg.rsample(R_mu, sigma)

sigma:  tensor([[1., 1., 1.]])
torch.Size([1, 3, 3])


In [7]:
torch.bmm(torch.transpose(R_x, 1, 2), R_x)

tensor([[[ 1.0000e+00,  0.0000e+00, -4.4703e-08],
         [ 0.0000e+00,  1.0000e+00,  5.9605e-08],
         [-4.4703e-08,  5.9605e-08,  1.0000e+00]]])

# Test torch_tsgaussian normal_term

In [8]:
sigma = torch.ones(3).reshape((1,3))
sigma

tensor([[1., 1., 1.]])

In [9]:
tg.normal_term(sigma)

tensor([15.7496])

# Test torch_tsgaussian log_map

In [10]:
R_1 = torch.eye(3).reshape((1, 3, 3))
R_2 = torch.eye(3).reshape((1, 3, 3))

In [11]:
tg.log_map(R_1, R_2)

tensor([0., 0., 0.])

# Test torch_tsgaussian log_probs

In [12]:
R_x = torch.eye(3).reshape((1,3,3))
R_mu = torch.zeros(3,3).reshape((1,3,3))
R_x = R_x.repeat(5, 1, 1)
R_mu = R_mu.repeat(5, 1, 1)
sigma = torch.ones(3).reshape((1,3))

In [13]:
# tg.log_probs(R_x, R_mu, sigma)

In [14]:
np.e ** (-2.7568)

0.06349462641817973

all codes run for torch_tsgaussian now, need to check it's correctness and make it into batch version.

# Test TangentSpaceGaussian actions_from_params

In [15]:
tsg = TSG(None)

In [16]:
print(tsg.distribution)

<tsGaussian.torch_tsgaussian.TangentSpaceGaussian object at 0x7f8b137266d0>


In [17]:
tsg

<stable_baselines_utils.TangentSpaceGaussian at 0x7f8b13726f70>

In [18]:
tsg.actions_from_params(torch.eye(3).reshape((1,3,3)), torch.ones(3).reshape((1,3)))

sigma:  tensor([[1., 1., 1.]])
torch.Size([1, 3, 3])


(tensor([[0.5141, 0.2487, 0.0094, 0.8208]]),
 tensor([[[ 0.8762,  0.2403,  0.4179],
          [ 0.2711,  0.4712, -0.8393],
          [-0.3986,  0.8486,  0.3478]]]))

# Test TangentSpaceGaussian log_prob_from_params

In [19]:
torch.eye(3).repeat(2,1,1).size()

torch.Size([2, 3, 3])

In [20]:
torch.ones(3).repeat(2,1).size()

torch.Size([2, 3])

In [21]:
# tsg.log_prob_from_params(torch.eye(3).repeat(2,1,1), torch.ones(3))

In [22]:
x = torch.randn(2, 4, 4)
y = torch.linalg.inv(x)
y

tensor([[[ 1.5790e-01,  2.1328e-01,  3.6575e-01, -4.2418e-01],
         [ 2.4405e-01,  1.1619e-03,  3.0531e-01,  2.8206e-01],
         [ 3.1882e-01, -3.9534e-01, -1.1444e-01, -4.3760e-02],
         [ 1.3187e-01,  7.9257e-01, -2.2410e-01, -2.8045e-02]],

        [[-1.4559e+00,  3.3509e+00,  6.4862e-01, -2.1494e+00],
         [ 8.8416e-01,  1.2517e+00,  1.7419e-01, -9.0476e-02],
         [ 1.7910e+00, -2.0875e+00, -9.7189e-01,  1.3043e+00],
         [-1.0137e+00,  2.0690e+00,  2.2690e-01, -9.3167e-01]]])

Again, codes can run, but need to check correctness.

# Try to run training

In [23]:
import torch
from absl import app, flags
from stable_baselines3 import SAC, PPO
from envs.wahba import Wahba
from stable_baselines_utils import CustomSACPolicy, \
    CustomCNN

In [24]:
def main(argv):
    env = Wahba()
    device = torch.device('cpu')
    policy_kwargs = dict(
        features_extractor_class = CustomCNN,
        features_extractor_kwargs = dict(features_dim = 256))
    policy_kwargs['n_critics'] = 1
    policy_kwargs['share_features_extractor'] = False
    policy = CustomSACPolicy
    model = SAC(policy, env, verbose = 1, ent_coef = 'auto_0.1',
                policy_kwargs = policy_kwargs, device = device)
    model.learn(total_timesteps = 500, eval_freq = 100, n_eval_episodes = 100)

In [25]:
from torch import autograd
with autograd.detect_anomaly():
    main(None)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -7.3     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 219      |
|    time_elapsed    | 0        |
|    total_timesteps | 4        |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -7.4     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 334      |
|    time_elapsed    | 0        |
|    total_timesteps | 8        |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.76    |
| time/              |          |
|    episodes        | 12       |
|    fps             |

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.2     |
| time/              |          |
|    episodes        | 100      |
|    fps             | 792      |
|    time_elapsed    | 0        |
|    total_timesteps | 100      |
---------------------------------


  with autograd.detect_anomaly():
[W NNPACK.cpp:79] Could not initialize NNPACK! Reason: Unsupported hardware.


tensor([[ 0.0282,  0.0451,  0.0034, -0.0394, -0.0057,  0.0364,  0.0276,  0.0125,
         -0.0045,  0.0236, -0.0181,  0.0141]])
sigma:  tensor([[0.0282, 0.0451, 0.0034]])
torch.Size([1, 3, 3])
tensor([[ 0.0285,  0.0453,  0.0035,  ...,  0.0233, -0.0180,  0.0146],
        [ 0.0281,  0.0455,  0.0038,  ...,  0.0237, -0.0182,  0.0144],
        [ 0.0277,  0.0452,  0.0037,  ...,  0.0241, -0.0181,  0.0141],
        ...,
        [ 0.0289,  0.0451,  0.0032,  ...,  0.0234, -0.0181,  0.0143],
        [ 0.0282,  0.0453,  0.0040,  ...,  0.0238, -0.0180,  0.0146],
        [ 0.0282,  0.0455,  0.0032,  ...,  0.0233, -0.0176,  0.0145]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.0285, 0.0453, 0.0035],
        [0.0281, 0.0455, 0.0038],
        [0.0277, 0.0452, 0.0037],
        [0.0284, 0.0454, 0.0036],
        [0.0282, 0.0451, 0.0037],
        [0.0283, 0.0453, 0.0037],
        [0.0286, 0.0451, 0.0032],
        [0.0282, 0.0454, 0.0039],
        [0.0288, 0.0454, 0.0034],
        [0.0286, 0.0454, 0

tensor([[ 0.0285,  0.0453,  0.0035,  ...,  0.0233, -0.0180,  0.0146],
        [ 0.0281,  0.0455,  0.0038,  ...,  0.0237, -0.0182,  0.0144],
        [ 0.0277,  0.0452,  0.0037,  ...,  0.0241, -0.0181,  0.0141],
        ...,
        [ 0.0289,  0.0451,  0.0032,  ...,  0.0234, -0.0181,  0.0143],
        [ 0.0282,  0.0453,  0.0040,  ...,  0.0238, -0.0180,  0.0146],
        [ 0.0282,  0.0455,  0.0032,  ...,  0.0233, -0.0176,  0.0145]])
sigma:  tensor([[0.0285, 0.0453, 0.0035],
        [0.0281, 0.0455, 0.0038],
        [0.0277, 0.0452, 0.0037],
        [0.0284, 0.0454, 0.0036],
        [0.0282, 0.0451, 0.0037],
        [0.0283, 0.0453, 0.0037],
        [0.0286, 0.0451, 0.0032],
        [0.0282, 0.0454, 0.0039],
        [0.0288, 0.0454, 0.0034],
        [0.0286, 0.0454, 0.0035],
        [0.0281, 0.0453, 0.0040],
        [0.0282, 0.0453, 0.0040],
        [0.0277, 0.0456, 0.0036],
        [0.0280, 0.0452, 0.0040],
        [0.0283, 0.0455, 0.0035],
        [0.0282, 0.0455, 0.0040],
        [0.028

tensor([[ 0.0297,  0.0484,  0.0282, -0.0411, -0.0060,  0.0368,  0.0244,  0.0128,
         -0.0025,  0.0255, -0.0269,  0.0144]])
sigma:  tensor([[0.0297, 0.0484, 0.0282]])
torch.Size([1, 3, 3])
tensor([[ 0.0299,  0.0486,  0.0273,  ...,  0.0253, -0.0258,  0.0148],
        [ 0.0293,  0.0485,  0.0274,  ...,  0.0253, -0.0262,  0.0146],
        [ 0.0293,  0.0486,  0.0276,  ...,  0.0254, -0.0263,  0.0146],
        ...,
        [ 0.0298,  0.0484,  0.0281,  ...,  0.0254, -0.0270,  0.0144],
        [ 0.0297,  0.0485,  0.0273,  ...,  0.0252, -0.0261,  0.0148],
        [ 0.0299,  0.0484,  0.0272,  ...,  0.0254, -0.0265,  0.0143]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.0299, 0.0486, 0.0273],
        [0.0293, 0.0485, 0.0274],
        [0.0293, 0.0486, 0.0276],
        [0.0298, 0.0484, 0.0274],
        [0.0293, 0.0485, 0.0277],
        [0.0294, 0.0486, 0.0276],
        [0.0295, 0.0485, 0.0282],
        [0.0300, 0.0485, 0.0276],
        [0.0295, 0.0484, 0.0282],
        [0.0296, 0.0483, 0

tensor([[ 0.0299,  0.0486,  0.0273,  ...,  0.0253, -0.0258,  0.0148],
        [ 0.0293,  0.0485,  0.0274,  ...,  0.0253, -0.0262,  0.0146],
        [ 0.0293,  0.0486,  0.0276,  ...,  0.0254, -0.0263,  0.0146],
        ...,
        [ 0.0298,  0.0484,  0.0281,  ...,  0.0254, -0.0270,  0.0144],
        [ 0.0297,  0.0485,  0.0273,  ...,  0.0252, -0.0261,  0.0148],
        [ 0.0299,  0.0484,  0.0272,  ...,  0.0254, -0.0265,  0.0143]])
sigma:  tensor([[0.0299, 0.0486, 0.0273],
        [0.0293, 0.0485, 0.0274],
        [0.0293, 0.0486, 0.0276],
        [0.0298, 0.0484, 0.0274],
        [0.0293, 0.0485, 0.0277],
        [0.0294, 0.0486, 0.0276],
        [0.0295, 0.0485, 0.0282],
        [0.0300, 0.0485, 0.0276],
        [0.0295, 0.0484, 0.0282],
        [0.0296, 0.0483, 0.0282],
        [0.0299, 0.0485, 0.0277],
        [0.0299, 0.0486, 0.0273],
        [0.0293, 0.0485, 0.0273],
        [0.0294, 0.0485, 0.0275],
        [0.0296, 0.0483, 0.0280],
        [0.0298, 0.0484, 0.0283],
        [0.029

tensor([[ 0.0352,  0.0559,  0.0456, -0.0416, -0.0056,  0.0379,  0.0211,  0.0138,
         -0.0017,  0.0266, -0.0319,  0.0151]])
sigma:  tensor([[0.0352, 0.0559, 0.0456]])
torch.Size([1, 3, 3])
tensor([[ 0.0349,  0.0557,  0.0453,  ...,  0.0269, -0.0314,  0.0149],
        [ 0.0350,  0.0560,  0.0462,  ...,  0.0271, -0.0323,  0.0149],
        [ 0.0352,  0.0558,  0.0456,  ...,  0.0271, -0.0322,  0.0149],
        ...,
        [ 0.0350,  0.0561,  0.0463,  ...,  0.0271, -0.0323,  0.0149],
        [ 0.0352,  0.0561,  0.0461,  ...,  0.0270, -0.0323,  0.0148],
        [ 0.0352,  0.0561,  0.0461,  ...,  0.0270, -0.0323,  0.0148]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.0349, 0.0557, 0.0453],
        [0.0350, 0.0560, 0.0462],
        [0.0352, 0.0558, 0.0456],
        [0.0353, 0.0554, 0.0443],
        [0.0350, 0.0559, 0.0460],
        [0.0350, 0.0560, 0.0461],
        [0.0350, 0.0559, 0.0460],
        [0.0357, 0.0556, 0.0444],
        [0.0349, 0.0557, 0.0453],
        [0.0352, 0.0559, 0

tensor([[ 0.0349,  0.0557,  0.0453,  ...,  0.0269, -0.0314,  0.0149],
        [ 0.0350,  0.0560,  0.0462,  ...,  0.0271, -0.0323,  0.0149],
        [ 0.0352,  0.0558,  0.0456,  ...,  0.0271, -0.0322,  0.0149],
        ...,
        [ 0.0350,  0.0561,  0.0463,  ...,  0.0271, -0.0323,  0.0149],
        [ 0.0352,  0.0561,  0.0461,  ...,  0.0270, -0.0323,  0.0148],
        [ 0.0352,  0.0561,  0.0461,  ...,  0.0270, -0.0323,  0.0148]])
sigma:  tensor([[0.0349, 0.0557, 0.0453],
        [0.0350, 0.0560, 0.0462],
        [0.0352, 0.0558, 0.0456],
        [0.0353, 0.0554, 0.0443],
        [0.0350, 0.0559, 0.0460],
        [0.0350, 0.0560, 0.0461],
        [0.0350, 0.0559, 0.0460],
        [0.0357, 0.0556, 0.0444],
        [0.0349, 0.0557, 0.0453],
        [0.0352, 0.0559, 0.0458],
        [0.0350, 0.0560, 0.0461],
        [0.0354, 0.0559, 0.0455],
        [0.0357, 0.0559, 0.0448],
        [0.0352, 0.0561, 0.0463],
        [0.0352, 0.0561, 0.0460],
        [0.0354, 0.0561, 0.0457],
        [0.035

tensor([[ 0.0426,  0.0644,  0.0617, -0.0435, -0.0062,  0.0384,  0.0182,  0.0144,
         -0.0008,  0.0276, -0.0361,  0.0157]])
sigma:  tensor([[0.0426, 0.0644, 0.0617]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.17    |
| time/              |          |
|    episodes        | 104      |
|    fps             | 12       |
|    time_elapsed    | 8        |
|    total_timesteps | 104      |
| train/             |          |
|    actor_loss      | 0.254    |
|    critic_loss     | 20.3     |
|    ent_coef        | 0.0999   |
|    ent_coef_loss   | -4.53    |
|    learning_rate   | 0.0003   |
|    n_updates       | 3        |
---------------------------------
tensor([[ 0.0424,  0.0642,  0.0614,  ...,  0.0276, -0.0360,  0.0159],
        [ 0.0422,  0.0642,  0.0609,  ...,  0.0282, -0.0358,  0.0156],
        [ 0.0425,  0.0640,  0.0608,  ...,  0.0275, -0.0354,  0.0158],
        ...,
      

tensor([[ 0.0424,  0.0642,  0.0614,  ...,  0.0276, -0.0360,  0.0159],
        [ 0.0422,  0.0642,  0.0609,  ...,  0.0282, -0.0358,  0.0156],
        [ 0.0425,  0.0640,  0.0608,  ...,  0.0275, -0.0354,  0.0158],
        ...,
        [ 0.0419,  0.0642,  0.0608,  ...,  0.0281, -0.0358,  0.0157],
        [ 0.0421,  0.0641,  0.0609,  ...,  0.0279, -0.0360,  0.0155],
        [ 0.0421,  0.0646,  0.0625,  ...,  0.0281, -0.0369,  0.0156]])
sigma:  tensor([[0.0424, 0.0642, 0.0614],
        [0.0422, 0.0642, 0.0609],
        [0.0425, 0.0640, 0.0608],
        [0.0419, 0.0642, 0.0607],
        [0.0422, 0.0644, 0.0625],
        [0.0419, 0.0642, 0.0612],
        [0.0427, 0.0637, 0.0603],
        [0.0418, 0.0641, 0.0607],
        [0.0421, 0.0642, 0.0619],
        [0.0420, 0.0642, 0.0609],
        [0.0419, 0.0642, 0.0612],
        [0.0426, 0.0642, 0.0615],
        [0.0422, 0.0644, 0.0624],
        [0.0426, 0.0636, 0.0606],
        [0.0425, 0.0641, 0.0605],
        [0.0422, 0.0645, 0.0623],
        [0.042

tensor([[ 0.0511,  0.0734,  0.0771, -0.0447, -0.0064,  0.0392,  0.0148,  0.0162,
         -0.0012,  0.0287, -0.0396,  0.0166]])
sigma:  tensor([[0.0511, 0.0734, 0.0771]])
torch.Size([1, 3, 3])
tensor([[ 0.0511,  0.0738,  0.0788,  ...,  0.0293, -0.0408,  0.0166],
        [ 0.0512,  0.0737,  0.0790,  ...,  0.0293, -0.0408,  0.0164],
        [ 0.0504,  0.0730,  0.0764,  ...,  0.0295, -0.0398,  0.0170],
        ...,
        [ 0.0507,  0.0736,  0.0776,  ...,  0.0291, -0.0401,  0.0168],
        [ 0.0514,  0.0727,  0.0765,  ...,  0.0289, -0.0395,  0.0166],
        [ 0.0509,  0.0727,  0.0757,  ...,  0.0288, -0.0392,  0.0162]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.0511, 0.0738, 0.0788],
        [0.0512, 0.0737, 0.0790],
        [0.0504, 0.0730, 0.0764],
        [0.0511, 0.0734, 0.0771],
        [0.0510, 0.0733, 0.0774],
        [0.0504, 0.0732, 0.0764],
        [0.0511, 0.0736, 0.0788],
        [0.0507, 0.0734, 0.0766],
        [0.0510, 0.0735, 0.0783],
        [0.0514, 0.0726, 0

tensor([[ 0.0511,  0.0738,  0.0788,  ...,  0.0293, -0.0408,  0.0166],
        [ 0.0512,  0.0737,  0.0790,  ...,  0.0293, -0.0408,  0.0164],
        [ 0.0504,  0.0730,  0.0764,  ...,  0.0295, -0.0398,  0.0170],
        ...,
        [ 0.0507,  0.0736,  0.0776,  ...,  0.0291, -0.0401,  0.0168],
        [ 0.0514,  0.0727,  0.0765,  ...,  0.0289, -0.0395,  0.0166],
        [ 0.0509,  0.0727,  0.0757,  ...,  0.0288, -0.0392,  0.0162]])
sigma:  tensor([[0.0511, 0.0738, 0.0788],
        [0.0512, 0.0737, 0.0790],
        [0.0504, 0.0730, 0.0764],
        [0.0511, 0.0734, 0.0771],
        [0.0510, 0.0733, 0.0774],
        [0.0504, 0.0732, 0.0764],
        [0.0511, 0.0736, 0.0788],
        [0.0507, 0.0734, 0.0766],
        [0.0510, 0.0735, 0.0783],
        [0.0514, 0.0726, 0.0767],
        [0.0510, 0.0732, 0.0767],
        [0.0513, 0.0736, 0.0789],
        [0.0511, 0.0734, 0.0771],
        [0.0511, 0.0736, 0.0787],
        [0.0512, 0.0731, 0.0777],
        [0.0510, 0.0734, 0.0778],
        [0.050

tensor([[ 0.0618,  0.0822,  0.0929, -0.0462, -0.0060,  0.0400,  0.0129,  0.0195,
         -0.0008,  0.0301, -0.0434,  0.0180]])
sigma:  tensor([[0.0618, 0.0822, 0.0929]])
torch.Size([1, 3, 3])
tensor([[ 0.0620,  0.0832,  0.0946,  ...,  0.0303, -0.0444,  0.0182],
        [ 0.0617,  0.0829,  0.0933,  ...,  0.0301, -0.0435,  0.0179],
        [ 0.0615,  0.0833,  0.0932,  ...,  0.0302, -0.0434,  0.0179],
        ...,
        [ 0.0623,  0.0834,  0.0944,  ...,  0.0304, -0.0440,  0.0178],
        [ 0.0624,  0.0834,  0.0954,  ...,  0.0305, -0.0447,  0.0180],
        [ 0.0625,  0.0836,  0.0956,  ...,  0.0305, -0.0447,  0.0179]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.0620, 0.0832, 0.0946],
        [0.0617, 0.0829, 0.0933],
        [0.0615, 0.0833, 0.0932],
        [0.0620, 0.0834, 0.0940],
        [0.0621, 0.0835, 0.0940],
        [0.0627, 0.0831, 0.0952],
        [0.0620, 0.0830, 0.0931],
        [0.0622, 0.0834, 0.0951],
        [0.0623, 0.0834, 0.0953],
        [0.0613, 0.0832, 0

tensor([[ 0.0620,  0.0832,  0.0946,  ...,  0.0303, -0.0444,  0.0182],
        [ 0.0617,  0.0829,  0.0933,  ...,  0.0301, -0.0435,  0.0179],
        [ 0.0615,  0.0833,  0.0932,  ...,  0.0302, -0.0434,  0.0179],
        ...,
        [ 0.0623,  0.0834,  0.0944,  ...,  0.0304, -0.0440,  0.0178],
        [ 0.0624,  0.0834,  0.0954,  ...,  0.0305, -0.0447,  0.0180],
        [ 0.0625,  0.0836,  0.0956,  ...,  0.0305, -0.0447,  0.0179]])
sigma:  tensor([[0.0620, 0.0832, 0.0946],
        [0.0617, 0.0829, 0.0933],
        [0.0615, 0.0833, 0.0932],
        [0.0620, 0.0834, 0.0940],
        [0.0621, 0.0835, 0.0940],
        [0.0627, 0.0831, 0.0952],
        [0.0620, 0.0830, 0.0931],
        [0.0622, 0.0834, 0.0951],
        [0.0623, 0.0834, 0.0953],
        [0.0613, 0.0832, 0.0930],
        [0.0623, 0.0823, 0.0932],
        [0.0628, 0.0836, 0.0950],
        [0.0622, 0.0834, 0.0953],
        [0.0623, 0.0834, 0.0953],
        [0.0621, 0.0832, 0.0935],
        [0.0619, 0.0828, 0.0923],
        [0.062

tensor([[ 0.0758,  0.0939,  0.1095, -0.0474, -0.0051,  0.0422,  0.0113,  0.0226,
          0.0007,  0.0311, -0.0465,  0.0197]])
sigma:  tensor([[0.0758, 0.0939, 0.1095]])
torch.Size([1, 3, 3])
tensor([[ 0.0755,  0.0932,  0.1099,  ...,  0.0313, -0.0470,  0.0201],
        [ 0.0768,  0.0940,  0.1112,  ...,  0.0307, -0.0468,  0.0197],
        [ 0.0748,  0.0932,  0.1092,  ...,  0.0310, -0.0465,  0.0194],
        ...,
        [ 0.0769,  0.0940,  0.1120,  ...,  0.0312, -0.0474,  0.0199],
        [ 0.0771,  0.0944,  0.1127,  ...,  0.0313, -0.0477,  0.0200],
        [ 0.0766,  0.0944,  0.1124,  ...,  0.0316, -0.0478,  0.0200]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.0755, 0.0932, 0.1099],
        [0.0768, 0.0940, 0.1112],
        [0.0748, 0.0932, 0.1092],
        [0.0751, 0.0935, 0.1086],
        [0.0766, 0.0947, 0.1125],
        [0.0771, 0.0944, 0.1127],
        [0.0760, 0.0944, 0.1113],
        [0.0744, 0.0930, 0.1082],
        [0.0757, 0.0939, 0.1099],
        [0.0756, 0.0942, 0

tensor([[ 0.0755,  0.0932,  0.1099,  ...,  0.0313, -0.0470,  0.0201],
        [ 0.0768,  0.0940,  0.1112,  ...,  0.0307, -0.0468,  0.0197],
        [ 0.0748,  0.0932,  0.1092,  ...,  0.0310, -0.0465,  0.0194],
        ...,
        [ 0.0769,  0.0940,  0.1120,  ...,  0.0312, -0.0474,  0.0199],
        [ 0.0771,  0.0944,  0.1127,  ...,  0.0313, -0.0477,  0.0200],
        [ 0.0766,  0.0944,  0.1124,  ...,  0.0316, -0.0478,  0.0200]])
sigma:  tensor([[0.0755, 0.0932, 0.1099],
        [0.0768, 0.0940, 0.1112],
        [0.0748, 0.0932, 0.1092],
        [0.0751, 0.0935, 0.1086],
        [0.0766, 0.0947, 0.1125],
        [0.0771, 0.0944, 0.1127],
        [0.0760, 0.0944, 0.1113],
        [0.0744, 0.0930, 0.1082],
        [0.0757, 0.0939, 0.1099],
        [0.0756, 0.0942, 0.1106],
        [0.0766, 0.0944, 0.1124],
        [0.0759, 0.0936, 0.1098],
        [0.0765, 0.0945, 0.1122],
        [0.0755, 0.0932, 0.1099],
        [0.0762, 0.0939, 0.1114],
        [0.0759, 0.0941, 0.1108],
        [0.075

tensor([[ 0.0946,  0.1073,  0.1321, -0.0476, -0.0037,  0.0447,  0.0102,  0.0288,
          0.0009,  0.0327, -0.0508,  0.0213]])
sigma:  tensor([[0.0946, 0.1073, 0.1321]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.14    |
| time/              |          |
|    episodes        | 108      |
|    fps             | 5        |
|    time_elapsed    | 18       |
|    total_timesteps | 108      |
| train/             |          |
|    actor_loss      | 0.354    |
|    critic_loss     | 20.1     |
|    ent_coef        | 0.0998   |
|    ent_coef_loss   | -6.65    |
|    learning_rate   | 0.0003   |
|    n_updates       | 7        |
---------------------------------
tensor([[ 0.0915,  0.1061,  0.1281,  ...,  0.0324, -0.0495,  0.0213],
        [ 0.0909,  0.1051,  0.1274,  ...,  0.0326, -0.0497,  0.0220],
        [ 0.0921,  0.1063,  0.1290,  ...,  0.0327, -0.0500,  0.0217],
        ...,
      

tensor([[ 0.0915,  0.1061,  0.1281,  ...,  0.0324, -0.0495,  0.0213],
        [ 0.0909,  0.1051,  0.1274,  ...,  0.0326, -0.0497,  0.0220],
        [ 0.0921,  0.1063,  0.1290,  ...,  0.0327, -0.0500,  0.0217],
        ...,
        [ 0.0921,  0.1064,  0.1290,  ...,  0.0327, -0.0500,  0.0215],
        [ 0.0942,  0.1071,  0.1320,  ...,  0.0328, -0.0512,  0.0217],
        [ 0.0935,  0.1069,  0.1313,  ...,  0.0329, -0.0511,  0.0218]])
sigma:  tensor([[0.0915, 0.1061, 0.1281],
        [0.0909, 0.1051, 0.1274],
        [0.0921, 0.1063, 0.1290],
        [0.0919, 0.1066, 0.1284],
        [0.0939, 0.1071, 0.1318],
        [0.0930, 0.1069, 0.1306],
        [0.0915, 0.1062, 0.1282],
        [0.0913, 0.1055, 0.1278],
        [0.0930, 0.1070, 0.1294],
        [0.0906, 0.1052, 0.1266],
        [0.0937, 0.1066, 0.1300],
        [0.0946, 0.1073, 0.1321],
        [0.0932, 0.1071, 0.1307],
        [0.0915, 0.1061, 0.1281],
        [0.0940, 0.1077, 0.1321],
        [0.0941, 0.1073, 0.1312],
        [0.094

tensor([[ 0.1136,  0.1215,  0.1523, -0.0492, -0.0034,  0.0477,  0.0099,  0.0339,
          0.0022,  0.0355, -0.0540,  0.0225]])
sigma:  tensor([[0.1136, 0.1215, 0.1523]])
torch.Size([1, 3, 3])
tensor([[ 0.1106,  0.1195,  0.1485,  ...,  0.0348, -0.0532,  0.0223],
        [ 0.1134,  0.1215,  0.1522,  ...,  0.0356, -0.0544,  0.0223],
        [ 0.1119,  0.1205,  0.1497,  ...,  0.0350, -0.0535,  0.0225],
        ...,
        [ 0.1099,  0.1194,  0.1483,  ...,  0.0353, -0.0535,  0.0232],
        [ 0.1138,  0.1211,  0.1528,  ...,  0.0356, -0.0547,  0.0224],
        [ 0.1099,  0.1194,  0.1483,  ...,  0.0353, -0.0535,  0.0232]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.1106, 0.1195, 0.1485],
        [0.1134, 0.1215, 0.1522],
        [0.1119, 0.1205, 0.1497],
        [0.1115, 0.1200, 0.1497],
        [0.1136, 0.1216, 0.1531],
        [0.1138, 0.1216, 0.1534],
        [0.1119, 0.1203, 0.1503],
        [0.1144, 0.1217, 0.1540],
        [0.1147, 0.1222, 0.1539],
        [0.1142, 0.1215, 0

tensor([[ 0.1106,  0.1195,  0.1485,  ...,  0.0348, -0.0532,  0.0223],
        [ 0.1134,  0.1215,  0.1522,  ...,  0.0356, -0.0544,  0.0223],
        [ 0.1119,  0.1205,  0.1497,  ...,  0.0350, -0.0535,  0.0225],
        ...,
        [ 0.1099,  0.1194,  0.1483,  ...,  0.0353, -0.0535,  0.0232],
        [ 0.1138,  0.1211,  0.1528,  ...,  0.0356, -0.0547,  0.0224],
        [ 0.1099,  0.1194,  0.1483,  ...,  0.0353, -0.0535,  0.0232]])
sigma:  tensor([[0.1106, 0.1195, 0.1485],
        [0.1134, 0.1215, 0.1522],
        [0.1119, 0.1205, 0.1497],
        [0.1115, 0.1200, 0.1497],
        [0.1136, 0.1216, 0.1531],
        [0.1138, 0.1216, 0.1534],
        [0.1119, 0.1203, 0.1503],
        [0.1144, 0.1217, 0.1540],
        [0.1147, 0.1222, 0.1539],
        [0.1142, 0.1215, 0.1538],
        [0.1141, 0.1219, 0.1539],
        [0.1142, 0.1215, 0.1538],
        [0.1141, 0.1218, 0.1538],
        [0.1138, 0.1216, 0.1534],
        [0.1148, 0.1220, 0.1544],
        [0.1112, 0.1201, 0.1493],
        [0.111

tensor([[ 0.1394,  0.1379,  0.1806, -0.0504, -0.0031,  0.0505,  0.0089,  0.0430,
          0.0029,  0.0385, -0.0591,  0.0232]])
sigma:  tensor([[0.1394, 0.1379, 0.1806]])
torch.Size([1, 3, 3])
tensor([[ 0.1345,  0.1353,  0.1744,  ...,  0.0379, -0.0578,  0.0237],
        [ 0.1342,  0.1354,  0.1737,  ...,  0.0377, -0.0574,  0.0233],
        [ 0.1383,  0.1373,  0.1794,  ...,  0.0386, -0.0591,  0.0234],
        ...,
        [ 0.1363,  0.1363,  0.1757,  ...,  0.0381, -0.0579,  0.0233],
        [ 0.1366,  0.1357,  0.1766,  ...,  0.0381, -0.0584,  0.0232],
        [ 0.1363,  0.1356,  0.1763,  ...,  0.0381, -0.0581,  0.0232]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.1345, 0.1353, 0.1744],
        [0.1342, 0.1354, 0.1737],
        [0.1383, 0.1373, 0.1794],
        [0.1345, 0.1353, 0.1744],
        [0.1390, 0.1377, 0.1802],
        [0.1372, 0.1363, 0.1779],
        [0.1368, 0.1359, 0.1771],
        [0.1366, 0.1366, 0.1773],
        [0.1335, 0.1340, 0.1729],
        [0.1346, 0.1346, 0

tensor([[ 0.1345,  0.1353,  0.1744,  ...,  0.0379, -0.0578,  0.0237],
        [ 0.1342,  0.1354,  0.1737,  ...,  0.0377, -0.0574,  0.0233],
        [ 0.1383,  0.1373,  0.1794,  ...,  0.0386, -0.0591,  0.0234],
        ...,
        [ 0.1363,  0.1363,  0.1757,  ...,  0.0381, -0.0579,  0.0233],
        [ 0.1366,  0.1357,  0.1766,  ...,  0.0381, -0.0584,  0.0232],
        [ 0.1363,  0.1356,  0.1763,  ...,  0.0381, -0.0581,  0.0232]])
sigma:  tensor([[0.1345, 0.1353, 0.1744],
        [0.1342, 0.1354, 0.1737],
        [0.1383, 0.1373, 0.1794],
        [0.1345, 0.1353, 0.1744],
        [0.1390, 0.1377, 0.1802],
        [0.1372, 0.1363, 0.1779],
        [0.1368, 0.1359, 0.1771],
        [0.1366, 0.1366, 0.1773],
        [0.1335, 0.1340, 0.1729],
        [0.1346, 0.1346, 0.1734],
        [0.1358, 0.1358, 0.1759],
        [0.1362, 0.1358, 0.1765],
        [0.1387, 0.1374, 0.1799],
        [0.1380, 0.1371, 0.1791],
        [0.1347, 0.1351, 0.1748],
        [0.1388, 0.1374, 0.1799],
        [0.138

tensor([[ 0.1654,  0.1561,  0.2068, -0.0529, -0.0044,  0.0534,  0.0070,  0.0504,
          0.0059,  0.0411, -0.0619,  0.0235]])
sigma:  tensor([[0.1654, 0.1561, 0.2068]])
torch.Size([1, 3, 3])
tensor([[ 0.1674,  0.1572,  0.2099,  ...,  0.0418, -0.0636,  0.0239],
        [ 0.1689,  0.1581,  0.2114,  ...,  0.0420, -0.0636,  0.0238],
        [ 0.1649,  0.1558,  0.2061,  ...,  0.0412, -0.0624,  0.0241],
        ...,
        [ 0.1632,  0.1543,  0.2042,  ...,  0.0411, -0.0619,  0.0236],
        [ 0.1661,  0.1566,  0.2078,  ...,  0.0415, -0.0630,  0.0240],
        [ 0.1619,  0.1539,  0.2028,  ...,  0.0408, -0.0617,  0.0238]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.1674, 0.1572, 0.2099],
        [0.1689, 0.1581, 0.2114],
        [0.1649, 0.1558, 0.2061],
        [0.1655, 0.1555, 0.2074],
        [0.1669, 0.1567, 0.2084],
        [0.1633, 0.1547, 0.2040],
        [0.1611, 0.1526, 0.2014],
        [0.1687, 0.1578, 0.2108],
        [0.1650, 0.1555, 0.2059],
        [0.1655, 0.1561, 0

tensor([[ 0.1674,  0.1572,  0.2099,  ...,  0.0418, -0.0636,  0.0239],
        [ 0.1689,  0.1581,  0.2114,  ...,  0.0420, -0.0636,  0.0238],
        [ 0.1649,  0.1558,  0.2061,  ...,  0.0412, -0.0624,  0.0241],
        ...,
        [ 0.1632,  0.1543,  0.2042,  ...,  0.0411, -0.0619,  0.0236],
        [ 0.1661,  0.1566,  0.2078,  ...,  0.0415, -0.0630,  0.0240],
        [ 0.1619,  0.1539,  0.2028,  ...,  0.0408, -0.0617,  0.0238]])
sigma:  tensor([[0.1674, 0.1572, 0.2099],
        [0.1689, 0.1581, 0.2114],
        [0.1649, 0.1558, 0.2061],
        [0.1655, 0.1555, 0.2074],
        [0.1669, 0.1567, 0.2084],
        [0.1633, 0.1547, 0.2040],
        [0.1611, 0.1526, 0.2014],
        [0.1687, 0.1578, 0.2108],
        [0.1650, 0.1555, 0.2059],
        [0.1655, 0.1561, 0.2073],
        [0.1675, 0.1567, 0.2092],
        [0.1678, 0.1575, 0.2103],
        [0.1687, 0.1578, 0.2110],
        [0.1687, 0.1578, 0.2113],
        [0.1625, 0.1543, 0.2037],
        [0.1673, 0.1571, 0.2097],
        [0.166

tensor([[ 0.2073,  0.1853,  0.2511, -0.0555, -0.0064,  0.0619,  0.0020,  0.0645,
          0.0086,  0.0470, -0.0692,  0.0258]])
sigma:  tensor([[0.2073, 0.1853, 0.2511]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.09    |
| time/              |          |
|    episodes        | 112      |
|    fps             | 3        |
|    time_elapsed    | 28       |
|    total_timesteps | 112      |
| train/             |          |
|    actor_loss      | 0.798    |
|    critic_loss     | 17.1     |
|    ent_coef        | 0.0997   |
|    ent_coef_loss   | -8.46    |
|    learning_rate   | 0.0003   |
|    n_updates       | 11       |
---------------------------------
tensor([[ 0.2000,  0.1797,  0.2422,  ...,  0.0458, -0.0676,  0.0253],
        [ 0.1965,  0.1786,  0.2390,  ...,  0.0450, -0.0672,  0.0253],
        [ 0.1990,  0.1802,  0.2407,  ...,  0.0455, -0.0671,  0.0254],
        ...,
      

tensor([[ 0.2000,  0.1797,  0.2422,  ...,  0.0458, -0.0676,  0.0253],
        [ 0.1965,  0.1786,  0.2390,  ...,  0.0450, -0.0672,  0.0253],
        [ 0.1990,  0.1802,  0.2407,  ...,  0.0455, -0.0671,  0.0254],
        ...,
        [ 0.2015,  0.1813,  0.2448,  ...,  0.0462, -0.0684,  0.0257],
        [ 0.2039,  0.1827,  0.2473,  ...,  0.0466, -0.0689,  0.0258],
        [ 0.1988,  0.1796,  0.2412,  ...,  0.0453, -0.0673,  0.0253]])
sigma:  tensor([[0.2000, 0.1797, 0.2422],
        [0.1965, 0.1786, 0.2390],
        [0.1990, 0.1802, 0.2407],
        [0.1988, 0.1791, 0.2402],
        [0.2043, 0.1829, 0.2477],
        [0.1956, 0.1785, 0.2374],
        [0.1967, 0.1782, 0.2387],
        [0.1958, 0.1781, 0.2379],
        [0.2009, 0.1805, 0.2435],
        [0.2042, 0.1829, 0.2477],
        [0.1940, 0.1763, 0.2356],
        [0.1940, 0.1762, 0.2350],
        [0.1920, 0.1748, 0.2327],
        [0.1928, 0.1759, 0.2343],
        [0.1976, 0.1790, 0.2402],
        [0.1967, 0.1782, 0.2387],
        [0.195

tensor([[ 0.2317,  0.2040,  0.2737, -0.0598, -0.0097,  0.0633, -0.0003,  0.0697,
          0.0129,  0.0498, -0.0711,  0.0275]])
sigma:  tensor([[0.2317, 0.2040, 0.2737]])
torch.Size([1, 3, 3])
tensor([[ 0.2379,  0.2093,  0.2814,  ...,  0.0509, -0.0725,  0.0282],
        [ 0.2386,  0.2094,  0.2822,  ...,  0.0509, -0.0729,  0.0280],
        [ 0.2408,  0.2100,  0.2838,  ...,  0.0514, -0.0730,  0.0279],
        ...,
        [ 0.2425,  0.2120,  0.2866,  ...,  0.0519, -0.0736,  0.0283],
        [ 0.2427,  0.2124,  0.2870,  ...,  0.0519, -0.0736,  0.0284],
        [ 0.2458,  0.2137,  0.2905,  ...,  0.0523, -0.0741,  0.0283]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.2379, 0.2093, 0.2814],
        [0.2386, 0.2094, 0.2822],
        [0.2408, 0.2100, 0.2838],
        [0.2491, 0.2160, 0.2947],
        [0.2427, 0.2124, 0.2870],
        [0.2459, 0.2140, 0.2910],
        [0.2459, 0.2135, 0.2908],
        [0.2493, 0.2161, 0.2949],
        [0.2448, 0.2130, 0.2896],
        [0.2483, 0.2155, 0

tensor([[ 0.2379,  0.2093,  0.2814,  ...,  0.0509, -0.0725,  0.0282],
        [ 0.2386,  0.2094,  0.2822,  ...,  0.0509, -0.0729,  0.0280],
        [ 0.2408,  0.2100,  0.2838,  ...,  0.0514, -0.0730,  0.0279],
        ...,
        [ 0.2425,  0.2120,  0.2866,  ...,  0.0519, -0.0736,  0.0283],
        [ 0.2427,  0.2124,  0.2870,  ...,  0.0519, -0.0736,  0.0284],
        [ 0.2458,  0.2137,  0.2905,  ...,  0.0523, -0.0741,  0.0283]])
sigma:  tensor([[0.2379, 0.2093, 0.2814],
        [0.2386, 0.2094, 0.2822],
        [0.2408, 0.2100, 0.2838],
        [0.2491, 0.2160, 0.2947],
        [0.2427, 0.2124, 0.2870],
        [0.2459, 0.2140, 0.2910],
        [0.2459, 0.2135, 0.2908],
        [0.2493, 0.2161, 0.2949],
        [0.2448, 0.2130, 0.2896],
        [0.2483, 0.2155, 0.2939],
        [0.2369, 0.2085, 0.2797],
        [0.2314, 0.2042, 0.2735],
        [0.2464, 0.2142, 0.2916],
        [0.2427, 0.2112, 0.2866],
        [0.2473, 0.2148, 0.2926],
        [0.2494, 0.2163, 0.2953],
        [0.247

tensor([[ 0.3009,  0.2579,  0.3517, -0.0645, -0.0175,  0.0783, -0.0086,  0.0903,
          0.0183,  0.0611, -0.0840,  0.0325]])
sigma:  tensor([[0.3009, 0.2579, 0.3517]])
torch.Size([1, 3, 3])
tensor([[ 0.2924,  0.2521,  0.3412,  ...,  0.0593, -0.0817,  0.0319],
        [ 0.3025,  0.2585,  0.3535,  ...,  0.0614, -0.0844,  0.0325],
        [ 0.2963,  0.2541,  0.3461,  ...,  0.0603, -0.0829,  0.0320],
        ...,
        [ 0.2931,  0.2519,  0.3422,  ...,  0.0594, -0.0821,  0.0315],
        [ 0.2858,  0.2462,  0.3324,  ...,  0.0581, -0.0798,  0.0309],
        [ 0.2941,  0.2528,  0.3433,  ...,  0.0597, -0.0821,  0.0318]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.2924, 0.2521, 0.3412],
        [0.3025, 0.2585, 0.3535],
        [0.2963, 0.2541, 0.3461],
        [0.2858, 0.2462, 0.3324],
        [0.2914, 0.2514, 0.3396],
        [0.2941, 0.2528, 0.3433],
        [0.3011, 0.2575, 0.3518],
        [0.2919, 0.2512, 0.3406],
        [0.3006, 0.2574, 0.3513],
        [0.2963, 0.2541, 0

tensor([[ 0.2924,  0.2521,  0.3412,  ...,  0.0593, -0.0817,  0.0319],
        [ 0.3025,  0.2585,  0.3535,  ...,  0.0614, -0.0844,  0.0325],
        [ 0.2963,  0.2541,  0.3461,  ...,  0.0603, -0.0829,  0.0320],
        ...,
        [ 0.2931,  0.2519,  0.3422,  ...,  0.0594, -0.0821,  0.0315],
        [ 0.2858,  0.2462,  0.3324,  ...,  0.0581, -0.0798,  0.0309],
        [ 0.2941,  0.2528,  0.3433,  ...,  0.0597, -0.0821,  0.0318]])
sigma:  tensor([[0.2924, 0.2521, 0.3412],
        [0.3025, 0.2585, 0.3535],
        [0.2963, 0.2541, 0.3461],
        [0.2858, 0.2462, 0.3324],
        [0.2914, 0.2514, 0.3396],
        [0.2941, 0.2528, 0.3433],
        [0.3011, 0.2575, 0.3518],
        [0.2919, 0.2512, 0.3406],
        [0.3006, 0.2574, 0.3513],
        [0.2963, 0.2541, 0.3461],
        [0.2903, 0.2502, 0.3387],
        [0.2820, 0.2441, 0.3289],
        [0.2859, 0.2482, 0.3337],
        [0.2891, 0.2494, 0.3376],
        [0.2952, 0.2536, 0.3448],
        [0.2964, 0.2545, 0.3461],
        [0.299

tensor([[ 0.3538,  0.3019,  0.4081, -0.0707, -0.0248,  0.0856, -0.0127,  0.1030,
          0.0240,  0.0667, -0.0923,  0.0358]])
sigma:  tensor([[0.3538, 0.3019, 0.4081]])
torch.Size([1, 3, 3])
tensor([[ 0.3625,  0.3090,  0.4183,  ...,  0.0681, -0.0943,  0.0366],
        [ 0.3535,  0.3021,  0.4077,  ...,  0.0665, -0.0920,  0.0358],
        [ 0.3466,  0.2962,  0.3986,  ...,  0.0655, -0.0901,  0.0352],
        ...,
        [ 0.3506,  0.2996,  0.4042,  ...,  0.0662, -0.0914,  0.0356],
        [ 0.3598,  0.3061,  0.4149,  ...,  0.0677, -0.0939,  0.0361],
        [ 0.3604,  0.3075,  0.4160,  ...,  0.0678, -0.0939,  0.0365]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.3625, 0.3090, 0.4183],
        [0.3535, 0.3021, 0.4077],
        [0.3466, 0.2962, 0.3986],
        [0.3579, 0.3052, 0.4130],
        [0.3655, 0.3112, 0.4217],
        [0.3537, 0.3020, 0.4077],
        [0.3663, 0.3115, 0.4230],
        [0.3576, 0.3046, 0.4123],
        [0.3576, 0.3046, 0.4123],
        [0.3544, 0.3026, 0

tensor([[ 0.3625,  0.3090,  0.4183,  ...,  0.0681, -0.0943,  0.0366],
        [ 0.3535,  0.3021,  0.4077,  ...,  0.0665, -0.0920,  0.0358],
        [ 0.3466,  0.2962,  0.3986,  ...,  0.0655, -0.0901,  0.0352],
        ...,
        [ 0.3506,  0.2996,  0.4042,  ...,  0.0662, -0.0914,  0.0356],
        [ 0.3598,  0.3061,  0.4149,  ...,  0.0677, -0.0939,  0.0361],
        [ 0.3604,  0.3075,  0.4160,  ...,  0.0678, -0.0939,  0.0365]])
sigma:  tensor([[0.3625, 0.3090, 0.4183],
        [0.3535, 0.3021, 0.4077],
        [0.3466, 0.2962, 0.3986],
        [0.3579, 0.3052, 0.4130],
        [0.3655, 0.3112, 0.4217],
        [0.3537, 0.3020, 0.4077],
        [0.3663, 0.3115, 0.4230],
        [0.3576, 0.3046, 0.4123],
        [0.3576, 0.3046, 0.4123],
        [0.3544, 0.3026, 0.4086],
        [0.3618, 0.3083, 0.4177],
        [0.3578, 0.3044, 0.4124],
        [0.3379, 0.2902, 0.3894],
        [0.3426, 0.2938, 0.3951],
        [0.3587, 0.3058, 0.4138],
        [0.3646, 0.3100, 0.4208],
        [0.341

tensor([[ 0.4426,  0.3763,  0.5053, -0.0799, -0.0363,  0.0998, -0.0199,  0.1261,
          0.0335,  0.0773, -0.1082,  0.0419]])
sigma:  tensor([[0.4426, 0.3763, 0.5053]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.09    |
| time/              |          |
|    episodes        | 116      |
|    fps             | 3        |
|    time_elapsed    | 38       |
|    total_timesteps | 116      |
| train/             |          |
|    actor_loss      | 2.29     |
|    critic_loss     | 11.2     |
|    ent_coef        | 0.0996   |
|    ent_coef_loss   | -9.23    |
|    learning_rate   | 0.0003   |
|    n_updates       | 15       |
---------------------------------
tensor([[ 0.4481,  0.3802,  0.5117,  ...,  0.0782, -0.1096,  0.0422],
        [ 0.4434,  0.3762,  0.5059,  ...,  0.0774, -0.1085,  0.0417],
        [ 0.4258,  0.3635,  0.4859,  ...,  0.0745, -0.1042,  0.0404],
        ...,
      

tensor([[ 0.4481,  0.3802,  0.5117,  ...,  0.0782, -0.1096,  0.0422],
        [ 0.4434,  0.3762,  0.5059,  ...,  0.0774, -0.1085,  0.0417],
        [ 0.4258,  0.3635,  0.4859,  ...,  0.0745, -0.1042,  0.0404],
        ...,
        [ 0.4168,  0.3565,  0.4760,  ...,  0.0736, -0.1022,  0.0399],
        [ 0.4374,  0.3716,  0.4990,  ...,  0.0766, -0.1069,  0.0415],
        [ 0.4461,  0.3785,  0.5092,  ...,  0.0779, -0.1091,  0.0421]])
sigma:  tensor([[0.4481, 0.3802, 0.5117],
        [0.4434, 0.3762, 0.5059],
        [0.4258, 0.3635, 0.4859],
        [0.4436, 0.3771, 0.5063],
        [0.4354, 0.3706, 0.4970],
        [0.4370, 0.3714, 0.4984],
        [0.4162, 0.3571, 0.4756],
        [0.4222, 0.3609, 0.4817],
        [0.4348, 0.3700, 0.4962],
        [0.4460, 0.3788, 0.5090],
        [0.4218, 0.3617, 0.4819],
        [0.4259, 0.3625, 0.4854],
        [0.4351, 0.3708, 0.4966],
        [0.4370, 0.3714, 0.4988],
        [0.4481, 0.3802, 0.5117],
        [0.4481, 0.3799, 0.5115],
        [0.449

tensor([[ 0.5156,  0.4408,  0.5818, -0.0898, -0.0468,  0.1084, -0.0247,  0.1422,
          0.0451,  0.0835, -0.1202,  0.0445]])
sigma:  tensor([[0.5156, 0.4408, 0.5818]])
torch.Size([1, 3, 3])
tensor([[ 0.5220,  0.4464,  0.5893,  ...,  0.0844, -0.1219,  0.0448],
        [ 0.5356,  0.4569,  0.6047,  ...,  0.0860, -0.1246,  0.0448],
        [ 0.5561,  0.4720,  0.6273,  ...,  0.0890, -0.1297,  0.0461],
        ...,
        [ 0.5317,  0.4539,  0.6000,  ...,  0.0857, -0.1239,  0.0450],
        [ 0.5026,  0.4302,  0.5671,  ...,  0.0819, -0.1173,  0.0441],
        [ 0.5297,  0.4521,  0.5973,  ...,  0.0855, -0.1234,  0.0448]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.5220, 0.4464, 0.5893],
        [0.5356, 0.4569, 0.6047],
        [0.5561, 0.4720, 0.6273],
        [0.5296, 0.4519, 0.5975],
        [0.5353, 0.4565, 0.6040],
        [0.5407, 0.4607, 0.6101],
        [0.5108, 0.4369, 0.5766],
        [0.5269, 0.4497, 0.5946],
        [0.5296, 0.4519, 0.5975],
        [0.5543, 0.4704, 0

tensor([[ 0.5220,  0.4464,  0.5893,  ...,  0.0844, -0.1219,  0.0448],
        [ 0.5356,  0.4569,  0.6047,  ...,  0.0860, -0.1246,  0.0448],
        [ 0.5561,  0.4720,  0.6273,  ...,  0.0890, -0.1297,  0.0461],
        ...,
        [ 0.5317,  0.4539,  0.6000,  ...,  0.0857, -0.1239,  0.0450],
        [ 0.5026,  0.4302,  0.5671,  ...,  0.0819, -0.1173,  0.0441],
        [ 0.5297,  0.4521,  0.5973,  ...,  0.0855, -0.1234,  0.0448]])
sigma:  tensor([[0.5220, 0.4464, 0.5893],
        [0.5356, 0.4569, 0.6047],
        [0.5561, 0.4720, 0.6273],
        [0.5296, 0.4519, 0.5975],
        [0.5353, 0.4565, 0.6040],
        [0.5407, 0.4607, 0.6101],
        [0.5108, 0.4369, 0.5766],
        [0.5269, 0.4497, 0.5946],
        [0.5296, 0.4519, 0.5975],
        [0.5543, 0.4704, 0.6251],
        [0.5449, 0.4627, 0.6142],
        [0.5434, 0.4620, 0.6129],
        [0.5361, 0.4573, 0.6049],
        [0.5449, 0.4627, 0.6142],
        [0.5515, 0.4685, 0.6221],
        [0.5287, 0.4506, 0.5966],
        [0.540

tensor([[ 0.6558,  0.5606,  0.7310, -0.1024, -0.0638,  0.1281, -0.0360,  0.1774,
          0.0632,  0.0958, -0.1482,  0.0479]])
sigma:  tensor([[0.6558, 0.5606, 0.7310]])
torch.Size([1, 3, 3])
tensor([[ 0.6626,  0.5672,  0.7398,  ...,  0.0972, -0.1501,  0.0484],
        [ 0.6416,  0.5508,  0.7167,  ...,  0.0947, -0.1455,  0.0477],
        [ 0.6439,  0.5527,  0.7187,  ...,  0.0945, -0.1456,  0.0473],
        ...,
        [ 0.6379,  0.5464,  0.7109,  ...,  0.0936, -0.1441,  0.0472],
        [ 0.6901,  0.5887,  0.7701,  ...,  0.1004, -0.1561,  0.0491],
        [ 0.6443,  0.5526,  0.7189,  ...,  0.0946, -0.1458,  0.0473]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.6626, 0.5672, 0.7398],
        [0.6416, 0.5508, 0.7167],
        [0.6439, 0.5527, 0.7187],
        [0.6901, 0.5887, 0.7701],
        [0.6860, 0.5854, 0.7655],
        [0.6443, 0.5526, 0.7189],
        [0.6294, 0.5404, 0.7028],
        [0.6406, 0.5504, 0.7155],
        [0.6823, 0.5826, 0.7614],
        [0.6908, 0.5890, 0

tensor([[ 0.6626,  0.5672,  0.7398,  ...,  0.0972, -0.1501,  0.0484],
        [ 0.6416,  0.5508,  0.7167,  ...,  0.0947, -0.1455,  0.0477],
        [ 0.6439,  0.5527,  0.7187,  ...,  0.0945, -0.1456,  0.0473],
        ...,
        [ 0.6379,  0.5464,  0.7109,  ...,  0.0936, -0.1441,  0.0472],
        [ 0.6901,  0.5887,  0.7701,  ...,  0.1004, -0.1561,  0.0491],
        [ 0.6443,  0.5526,  0.7189,  ...,  0.0946, -0.1458,  0.0473]])
sigma:  tensor([[0.6626, 0.5672, 0.7398],
        [0.6416, 0.5508, 0.7167],
        [0.6439, 0.5527, 0.7187],
        [0.6901, 0.5887, 0.7701],
        [0.6860, 0.5854, 0.7655],
        [0.6443, 0.5526, 0.7189],
        [0.6294, 0.5404, 0.7028],
        [0.6406, 0.5504, 0.7155],
        [0.6823, 0.5826, 0.7614],
        [0.6908, 0.5890, 0.7706],
        [0.6817, 0.5821, 0.7607],
        [0.6501, 0.5563, 0.7246],
        [0.6847, 0.5847, 0.7643],
        [0.6537, 0.5604, 0.7293],
        [0.6786, 0.5797, 0.7574],
        [0.6727, 0.5747, 0.7506],
        [0.689

tensor([[ 0.8122,  0.7007,  0.8953, -0.1224, -0.0807,  0.1470, -0.0522,  0.2102,
          0.0823,  0.1062, -0.1782,  0.0468]])
sigma:  tensor([[0.8122, 0.7007, 0.8953]])
torch.Size([1, 3, 3])
tensor([[ 0.8598,  0.7386,  0.9476,  ...,  0.1113, -0.1884,  0.0480],
        [ 0.7876,  0.6792,  0.8674,  ...,  0.1030, -0.1728,  0.0462],
        [ 0.8548,  0.7348,  0.9423,  ...,  0.1108, -0.1875,  0.0480],
        ...,
        [ 0.7925,  0.6846,  0.8733,  ...,  0.1033, -0.1734,  0.0464],
        [ 0.8050,  0.6943,  0.8876,  ...,  0.1048, -0.1766,  0.0468],
        [ 0.8167,  0.7039,  0.9004,  ...,  0.1062, -0.1792,  0.0471]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[0.8598, 0.7386, 0.9476],
        [0.7876, 0.6792, 0.8674],
        [0.8548, 0.7348, 0.9423],
        [0.7810, 0.6750, 0.8607],
        [0.7988, 0.6904, 0.8811],
        [0.8269, 0.7119, 0.9110],
        [0.8379, 0.7210, 0.9238],
        [0.8143, 0.7026, 0.8976],
        [0.8534, 0.7341, 0.9410],
        [0.8499, 0.7311, 0

tensor([[ 0.8598,  0.7386,  0.9476,  ...,  0.1113, -0.1884,  0.0480],
        [ 0.7876,  0.6792,  0.8674,  ...,  0.1030, -0.1728,  0.0462],
        [ 0.8548,  0.7348,  0.9423,  ...,  0.1108, -0.1875,  0.0480],
        ...,
        [ 0.7925,  0.6846,  0.8733,  ...,  0.1033, -0.1734,  0.0464],
        [ 0.8050,  0.6943,  0.8876,  ...,  0.1048, -0.1766,  0.0468],
        [ 0.8167,  0.7039,  0.9004,  ...,  0.1062, -0.1792,  0.0471]])
sigma:  tensor([[0.8598, 0.7386, 0.9476],
        [0.7876, 0.6792, 0.8674],
        [0.8548, 0.7348, 0.9423],
        [0.7810, 0.6750, 0.8607],
        [0.7988, 0.6904, 0.8811],
        [0.8269, 0.7119, 0.9110],
        [0.8379, 0.7210, 0.9238],
        [0.8143, 0.7026, 0.8976],
        [0.8534, 0.7341, 0.9410],
        [0.8499, 0.7311, 0.9368],
        [0.8235, 0.7096, 0.9081],
        [0.8056, 0.6953, 0.8886],
        [0.8354, 0.7196, 0.9211],
        [0.7792, 0.6734, 0.8592],
        [0.8499, 0.7311, 0.9368],
        [0.8278, 0.7131, 0.9126],
        [0.805

tensor([[ 1.0665,  0.9248,  1.1626, -0.1536, -0.1104,  0.1730, -0.0812,  0.2625,
          0.1107,  0.1300, -0.2289,  0.0472]])
sigma:  tensor([[1.0665, 0.9248, 1.1626]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.13    |
| time/              |          |
|    episodes        | 120      |
|    fps             | 2        |
|    time_elapsed    | 48       |
|    total_timesteps | 120      |
| train/             |          |
|    actor_loss      | 6.54     |
|    critic_loss     | 2.3      |
|    ent_coef        | 0.0994   |
|    ent_coef_loss   | -9.21    |
|    learning_rate   | 0.0003   |
|    n_updates       | 19       |
---------------------------------
tensor([[ 1.0526,  0.9138,  1.1477,  ...,  0.1282, -0.2260,  0.0465],
        [ 1.0528,  0.9129,  1.1471,  ...,  0.1282, -0.2258,  0.0470],
        [ 1.0274,  0.8923,  1.1200,  ...,  0.1258, -0.2210,  0.0468],
        ...,
      

tensor([[ 1.0526,  0.9138,  1.1477,  ...,  0.1282, -0.2260,  0.0465],
        [ 1.0528,  0.9129,  1.1471,  ...,  0.1282, -0.2258,  0.0470],
        [ 1.0274,  0.8923,  1.1200,  ...,  0.1258, -0.2210,  0.0468],
        ...,
        [ 1.0240,  0.8905,  1.1170,  ...,  0.1248, -0.2199,  0.0461],
        [ 1.0383,  0.9021,  1.1321,  ...,  0.1269, -0.2229,  0.0470],
        [ 1.0095,  0.8777,  1.1006,  ...,  0.1240, -0.2171,  0.0466]])
sigma:  tensor([[1.0526, 0.9138, 1.1477],
        [1.0528, 0.9129, 1.1471],
        [1.0274, 0.8923, 1.1200],
        [1.0076, 0.8763, 1.0983],
        [1.0665, 0.9248, 1.1626],
        [1.0439, 0.9063, 1.1381],
        [1.0611, 0.9206, 1.1568],
        [0.9926, 0.8641, 1.0821],
        [1.0675, 0.9257, 1.1638],
        [1.0095, 0.8777, 1.1006],
        [1.0108, 0.8797, 1.1023],
        [1.0664, 0.9246, 1.1624],
        [1.0584, 0.9181, 1.1538],
        [1.0614, 0.9206, 1.1571],
        [1.0379, 0.9008, 1.1312],
        [0.9517, 0.8293, 1.0373],
        [1.067

tensor([[ 1.3123,  1.1477,  1.4158, -0.1904, -0.1318,  0.2022, -0.1005,  0.3192,
          0.1393,  0.1419, -0.2802,  0.0384]])
sigma:  tensor([[1.3123, 1.1477, 1.4158]])
torch.Size([1, 3, 3])
tensor([[ 1.3356,  1.1668,  1.4405,  ...,  0.1437, -0.2845,  0.0381],
        [ 1.3328,  1.1650,  1.4382,  ...,  0.1438, -0.2844,  0.0382],
        [ 1.2250,  1.0727,  1.3199,  ...,  0.1331, -0.2615,  0.0382],
        ...,
        [ 1.2453,  1.0896,  1.3422,  ...,  0.1351, -0.2659,  0.0385],
        [ 1.2250,  1.0727,  1.3199,  ...,  0.1331, -0.2615,  0.0382],
        [ 1.3377,  1.1686,  1.4429,  ...,  0.1441, -0.2853,  0.0384]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[1.3356, 1.1668, 1.4405],
        [1.3328, 1.1650, 1.4382],
        [1.2250, 1.0727, 1.3199],
        [1.2858, 1.1242, 1.3862],
        [1.2512, 1.0959, 1.3495],
        [1.2538, 1.0979, 1.3523],
        [1.3199, 1.1539, 1.4243],
        [1.2572, 1.1007, 1.3558],
        [1.2704, 1.1123, 1.3701],
        [1.3125, 1.1479, 1

tensor([[ 1.3356,  1.1668,  1.4405,  ...,  0.1437, -0.2845,  0.0381],
        [ 1.3328,  1.1650,  1.4382,  ...,  0.1438, -0.2844,  0.0382],
        [ 1.2250,  1.0727,  1.3199,  ...,  0.1331, -0.2615,  0.0382],
        ...,
        [ 1.2453,  1.0896,  1.3422,  ...,  0.1351, -0.2659,  0.0385],
        [ 1.2250,  1.0727,  1.3199,  ...,  0.1331, -0.2615,  0.0382],
        [ 1.3377,  1.1686,  1.4429,  ...,  0.1441, -0.2853,  0.0384]])
sigma:  tensor([[1.3356, 1.1668, 1.4405],
        [1.3328, 1.1650, 1.4382],
        [1.2250, 1.0727, 1.3199],
        [1.2858, 1.1242, 1.3862],
        [1.2512, 1.0959, 1.3495],
        [1.2538, 1.0979, 1.3523],
        [1.3199, 1.1539, 1.4243],
        [1.2572, 1.1007, 1.3558],
        [1.2704, 1.1123, 1.3701],
        [1.3125, 1.1479, 1.4158],
        [1.3608, 1.1885, 1.4683],
        [1.2788, 1.1189, 1.3792],
        [1.3437, 1.1737, 1.4495],
        [1.2921, 1.1297, 1.3931],
        [1.3199, 1.1539, 1.4243],
        [1.2842, 1.1232, 1.3848],
        [1.343

tensor([[ 1.6643,  1.4654,  1.7737, -0.2213, -0.1606,  0.2479, -0.1193,  0.4020,
          0.1994,  0.1778, -0.3369,  0.0444]])
sigma:  tensor([[1.6643, 1.4654, 1.7737]])
torch.Size([1, 3, 3])
tensor([[ 1.5704,  1.3851,  1.6735,  ...,  0.1684, -0.3188,  0.0444],
        [ 1.6019,  1.4118,  1.7068,  ...,  0.1716, -0.3252,  0.0443],
        [ 1.5704,  1.3851,  1.6735,  ...,  0.1684, -0.3188,  0.0444],
        ...,
        [ 1.6866,  1.4847,  1.7974,  ...,  0.1801, -0.3414,  0.0450],
        [ 1.6675,  1.4683,  1.7769,  ...,  0.1782, -0.3375,  0.0450],
        [ 1.6838,  1.4822,  1.7944,  ...,  0.1799, -0.3409,  0.0450]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[1.5704, 1.3851, 1.6735],
        [1.6019, 1.4118, 1.7068],
        [1.5704, 1.3851, 1.6735],
        [1.6084, 1.4169, 1.7133],
        [1.6879, 1.4855, 1.7985],
        [1.6748, 1.4745, 1.7847],
        [1.5062, 1.3298, 1.6047],
        [1.6516, 1.4547, 1.7598],
        [1.5798, 1.3936, 1.6833],
        [1.6555, 1.4582, 1

tensor([[ 1.5704,  1.3851,  1.6735,  ...,  0.1684, -0.3188,  0.0444],
        [ 1.6019,  1.4118,  1.7068,  ...,  0.1716, -0.3252,  0.0443],
        [ 1.5704,  1.3851,  1.6735,  ...,  0.1684, -0.3188,  0.0444],
        ...,
        [ 1.6866,  1.4847,  1.7974,  ...,  0.1801, -0.3414,  0.0450],
        [ 1.6675,  1.4683,  1.7769,  ...,  0.1782, -0.3375,  0.0450],
        [ 1.6838,  1.4822,  1.7944,  ...,  0.1799, -0.3409,  0.0450]])
sigma:  tensor([[1.5704, 1.3851, 1.6735],
        [1.6019, 1.4118, 1.7068],
        [1.5704, 1.3851, 1.6735],
        [1.6084, 1.4169, 1.7133],
        [1.6879, 1.4855, 1.7985],
        [1.6748, 1.4745, 1.7847],
        [1.5062, 1.3298, 1.6047],
        [1.6516, 1.4547, 1.7598],
        [1.5798, 1.3936, 1.6833],
        [1.6555, 1.4582, 1.7647],
        [1.6724, 1.4722, 1.7820],
        [1.6565, 1.4586, 1.7650],
        [1.5355, 1.3560, 1.6362],
        [1.6643, 1.4654, 1.7737],
        [1.6353, 1.4411, 1.7427],
        [1.4818, 1.3087, 1.5785],
        [1.630

tensor([[ 2.0541,  1.8263,  2.1638, -0.2555, -0.1918,  0.2969, -0.1395,  0.4923,
          0.2708,  0.2216, -0.3972,  0.0529]])
sigma:  tensor([[2.0541, 1.8263, 2.1638]])
torch.Size([1, 3, 3])
tensor([[ 1.9087,  1.6994,  2.0102,  ...,  0.2068, -0.3699,  0.0519],
        [ 1.9494,  1.7342,  2.0529,  ...,  0.2107, -0.3777,  0.0522],
        [ 2.0451,  1.8187,  2.1545,  ...,  0.2206, -0.3955,  0.0525],
        ...,
        [ 1.9751,  1.7585,  2.0808,  ...,  0.2139, -0.3824,  0.0523],
        [ 1.9262,  1.7152,  2.0288,  ...,  0.2086, -0.3732,  0.0517],
        [ 2.0541,  1.8263,  2.1638,  ...,  0.2216, -0.3972,  0.0529]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[1.9087, 1.6994, 2.0102],
        [1.9494, 1.7342, 2.0529],
        [2.0451, 1.8187, 2.1545],
        [1.8733, 1.6697, 1.9738],
        [2.1049, 1.8710, 2.2177],
        [2.0981, 1.8656, 2.2110],
        [2.0869, 1.8554, 2.1988],
        [1.8690, 1.6651, 1.9685],
        [2.0315, 1.8067, 2.1401],
        [2.0113, 1.7888, 2

tensor([[ 1.9087,  1.6994,  2.0102,  ...,  0.2068, -0.3699,  0.0519],
        [ 1.9494,  1.7342,  2.0529,  ...,  0.2107, -0.3777,  0.0522],
        [ 2.0451,  1.8187,  2.1545,  ...,  0.2206, -0.3955,  0.0525],
        ...,
        [ 1.9751,  1.7585,  2.0808,  ...,  0.2139, -0.3824,  0.0523],
        [ 1.9262,  1.7152,  2.0288,  ...,  0.2086, -0.3732,  0.0517],
        [ 2.0541,  1.8263,  2.1638,  ...,  0.2216, -0.3972,  0.0529]])
sigma:  tensor([[1.9087, 1.6994, 2.0102],
        [1.9494, 1.7342, 2.0529],
        [2.0451, 1.8187, 2.1545],
        [1.8733, 1.6697, 1.9738],
        [2.1049, 1.8710, 2.2177],
        [2.0981, 1.8656, 2.2110],
        [2.0869, 1.8554, 2.1988],
        [1.8690, 1.6651, 1.9685],
        [2.0315, 1.8067, 2.1401],
        [2.0113, 1.7888, 2.1188],
        [2.0698, 1.8397, 2.1801],
        [2.1049, 1.8710, 2.2177],
        [2.1016, 1.8683, 2.2143],
        [1.9641, 1.7482, 2.0687],
        [1.9271, 1.7153, 2.0298],
        [2.0985, 1.8652, 2.2107],
        [2.054

tensor([[ 2.5059,  2.2550,  2.6121, -0.2977, -0.2292,  0.3497, -0.1650,  0.5941,
          0.3541,  0.2767, -0.4643,  0.0644]])
sigma:  tensor([[2.5059, 2.2550, 2.6121]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.16    |
| time/              |          |
|    episodes        | 124      |
|    fps             | 2        |
|    time_elapsed    | 59       |
|    total_timesteps | 124      |
| train/             |          |
|    actor_loss      | 8.13     |
|    critic_loss     | 5.13     |
|    ent_coef        | 0.0993   |
|    ent_coef_loss   | -12.1    |
|    learning_rate   | 0.0003   |
|    n_updates       | 23       |
---------------------------------
tensor([[ 2.6214,  2.3573,  2.7322,  ...,  0.2886, -0.4851,  0.0659],
        [ 2.6257,  2.3608,  2.7363,  ...,  0.2890, -0.4857,  0.0659],
        [ 2.5677,  2.3094,  2.6760,  ...,  0.2827, -0.4752,  0.0654],
        ...,
      

tensor([[ 2.6214,  2.3573,  2.7322,  ...,  0.2886, -0.4851,  0.0659],
        [ 2.6257,  2.3608,  2.7363,  ...,  0.2890, -0.4857,  0.0659],
        [ 2.5677,  2.3094,  2.6760,  ...,  0.2827, -0.4752,  0.0654],
        ...,
        [ 2.5059,  2.2550,  2.6121,  ...,  0.2767, -0.4643,  0.0644],
        [ 2.5155,  2.2632,  2.6215,  ...,  0.2775, -0.4661,  0.0644],
        [ 2.5754,  2.3168,  2.6842,  ...,  0.2837, -0.4767,  0.0651]])
sigma:  tensor([[2.6214, 2.3573, 2.7322],
        [2.6257, 2.3608, 2.7363],
        [2.5677, 2.3094, 2.6760],
        [2.6215, 2.3571, 2.7320],
        [2.4418, 2.1986, 2.5452],
        [2.4702, 2.2226, 2.5743],
        [2.4736, 2.2254, 2.5776],
        [2.5352, 2.2803, 2.6424],
        [2.5695, 2.3110, 2.6779],
        [2.3539, 2.1199, 2.4532],
        [2.4166, 2.1760, 2.5191],
        [2.6166, 2.3526, 2.7270],
        [2.6270, 2.3623, 2.7380],
        [2.3703, 2.1355, 2.4712],
        [2.5162, 2.2630, 2.6218],
        [2.5877, 2.3264, 2.6962],
        [2.487

tensor([[ 3.3217,  3.0170,  3.4255, -0.3829, -0.2977,  0.4380, -0.2010,  0.7857,
          0.4944,  0.3621, -0.5790,  0.0773]])
sigma:  tensor([[3.3217, 3.0170, 3.4255]])
torch.Size([1, 3, 3])
tensor([[ 3.0613,  2.7832,  3.1568,  ...,  0.3342, -0.5351,  0.0744],
        [ 3.1623,  2.8732,  3.2606,  ...,  0.3450, -0.5525,  0.0754],
        [ 3.1502,  2.8620,  3.2480,  ...,  0.3435, -0.5502,  0.0754],
        ...,
        [ 3.0373,  2.7612,  3.1317,  ...,  0.3315, -0.5310,  0.0735],
        [ 3.0239,  2.7479,  3.1173,  ...,  0.3299, -0.5286,  0.0738],
        [ 3.2191,  2.9244,  3.3191,  ...,  0.3509, -0.5619,  0.0763]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[3.0613, 2.7832, 3.1568],
        [3.1623, 2.8732, 3.2606],
        [3.1502, 2.8620, 3.2480],
        [3.0258, 2.7516, 3.1205],
        [2.9699, 2.7008, 3.0629],
        [3.2643, 2.9652, 3.3659],
        [3.1850, 2.8947, 3.2844],
        [3.1290, 2.8431, 3.2260],
        [3.2625, 2.9639, 3.3643],
        [3.1480, 2.8612, 3

tensor([[ 3.0613,  2.7832,  3.1568,  ...,  0.3342, -0.5351,  0.0744],
        [ 3.1623,  2.8732,  3.2606,  ...,  0.3450, -0.5525,  0.0754],
        [ 3.1502,  2.8620,  3.2480,  ...,  0.3435, -0.5502,  0.0754],
        ...,
        [ 3.0373,  2.7612,  3.1317,  ...,  0.3315, -0.5310,  0.0735],
        [ 3.0239,  2.7479,  3.1173,  ...,  0.3299, -0.5286,  0.0738],
        [ 3.2191,  2.9244,  3.3191,  ...,  0.3509, -0.5619,  0.0763]])
sigma:  tensor([[3.0613, 2.7832, 3.1568],
        [3.1623, 2.8732, 3.2606],
        [3.1502, 2.8620, 3.2480],
        [3.0258, 2.7516, 3.1205],
        [2.9699, 2.7008, 3.0629],
        [3.2643, 2.9652, 3.3659],
        [3.1850, 2.8947, 3.2844],
        [3.1290, 2.8431, 3.2260],
        [3.2625, 2.9639, 3.3643],
        [3.1480, 2.8612, 3.2464],
        [3.2025, 2.9103, 3.3024],
        [3.0606, 2.7829, 3.1560],
        [3.1072, 2.8244, 3.2041],
        [3.2726, 2.9729, 3.3746],
        [3.2434, 2.9467, 3.3449],
        [3.0587, 2.7793, 3.1531],
        [3.047

tensor([[ 4.0373,  3.7046,  4.1256, -0.4370, -0.3923,  0.4720, -0.2735,  0.9040,
          0.5981,  0.4022, -0.7151,  0.0589]])
sigma:  tensor([[4.0373, 3.7046, 4.1256]])
torch.Size([1, 3, 3])
tensor([[ 4.0364,  3.7038,  4.1245,  ...,  0.4020, -0.7149,  0.0589],
        [ 3.8943,  3.5742,  3.9791,  ...,  0.3881, -0.6901,  0.0580],
        [ 3.9500,  3.6251,  4.0365,  ...,  0.3929, -0.6998,  0.0577],
        ...,
        [ 3.8320,  3.5171,  3.9153,  ...,  0.3820, -0.6799,  0.0574],
        [ 4.0373,  3.7046,  4.1256,  ...,  0.4022, -0.7151,  0.0589],
        [ 3.7678,  3.4598,  3.8497,  ...,  0.3761, -0.6683,  0.0570]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[4.0364, 3.7038, 4.1245],
        [3.8943, 3.5742, 3.9791],
        [3.9500, 3.6251, 4.0365],
        [3.8603, 3.5437, 3.9445],
        [3.8603, 3.5437, 3.9445],
        [3.7588, 3.4509, 3.8411],
        [3.7783, 3.4686, 3.8605],
        [3.8496, 3.5340, 3.9335],
        [4.0219, 3.6907, 4.1099],
        [3.6864, 3.3851, 3

tensor([[ 4.0364,  3.7038,  4.1245,  ...,  0.4020, -0.7149,  0.0589],
        [ 3.8943,  3.5742,  3.9791,  ...,  0.3881, -0.6901,  0.0580],
        [ 3.9500,  3.6251,  4.0365,  ...,  0.3929, -0.6998,  0.0577],
        ...,
        [ 3.8320,  3.5171,  3.9153,  ...,  0.3820, -0.6799,  0.0574],
        [ 4.0373,  3.7046,  4.1256,  ...,  0.4022, -0.7151,  0.0589],
        [ 3.7678,  3.4598,  3.8497,  ...,  0.3761, -0.6683,  0.0570]])
sigma:  tensor([[4.0364, 3.7038, 4.1245],
        [3.8943, 3.5742, 3.9791],
        [3.9500, 3.6251, 4.0365],
        [3.8603, 3.5437, 3.9445],
        [3.8603, 3.5437, 3.9445],
        [3.7588, 3.4509, 3.8411],
        [3.7783, 3.4686, 3.8605],
        [3.8496, 3.5340, 3.9335],
        [4.0219, 3.6907, 4.1099],
        [3.6864, 3.3851, 3.7665],
        [3.6513, 3.3542, 3.7312],
        [3.8859, 3.5663, 3.9705],
        [3.8449, 3.5292, 3.9287],
        [4.0335, 3.7010, 4.1215],
        [3.7768, 3.4677, 3.8592],
        [4.0986, 3.7604, 4.1884],
        [3.836

tensor([[ 4.8075,  4.4513,  4.8714, -0.4901, -0.4981,  0.5018, -0.3567,  1.0248,
          0.7042,  0.4419, -0.8630,  0.0372]])
sigma:  tensor([[4.8075, 4.4513, 4.8714]])
torch.Size([1, 3, 3])
tensor([[ 4.8504,  4.4910,  4.9147,  ...,  0.4461, -0.8711,  0.0377],
        [ 4.8824,  4.5204,  4.9473,  ...,  0.4488, -0.8765,  0.0377],
        [ 4.9214,  4.5562,  4.9866,  ...,  0.4523, -0.8834,  0.0378],
        ...,
        [ 4.8068,  4.4512,  4.8707,  ...,  0.4422, -0.8632,  0.0376],
        [ 4.9687,  4.5998,  5.0349,  ...,  0.4566, -0.8917,  0.0377],
        [ 4.6922,  4.3459,  4.7548,  ...,  0.4322, -0.8431,  0.0375]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[4.8504, 4.4910, 4.9147],
        [4.8824, 4.5204, 4.9473],
        [4.9214, 4.5562, 4.9866],
        [4.7200, 4.3708, 4.7824],
        [4.9314, 4.5653, 4.9968],
        [4.6298, 4.2881, 4.6915],
        [4.5611, 4.2248, 4.6214],
        [4.8944, 4.5316, 4.9599],
        [4.8211, 4.4637, 4.8857],
        [4.9307, 4.5650, 4

tensor([[ 4.8504,  4.4910,  4.9147,  ...,  0.4461, -0.8711,  0.0377],
        [ 4.8824,  4.5204,  4.9473,  ...,  0.4488, -0.8765,  0.0377],
        [ 4.9214,  4.5562,  4.9866,  ...,  0.4523, -0.8834,  0.0378],
        ...,
        [ 4.8068,  4.4512,  4.8707,  ...,  0.4422, -0.8632,  0.0376],
        [ 4.9687,  4.5998,  5.0349,  ...,  0.4566, -0.8917,  0.0377],
        [ 4.6922,  4.3459,  4.7548,  ...,  0.4322, -0.8431,  0.0375]])
sigma:  tensor([[4.8504, 4.4910, 4.9147],
        [4.8824, 4.5204, 4.9473],
        [4.9214, 4.5562, 4.9866],
        [4.7200, 4.3708, 4.7824],
        [4.9314, 4.5653, 4.9968],
        [4.6298, 4.2881, 4.6915],
        [4.5611, 4.2248, 4.6214],
        [4.8944, 4.5316, 4.9599],
        [4.8211, 4.4637, 4.8857],
        [4.9307, 4.5650, 4.9966],
        [4.6954, 4.3487, 4.7579],
        [4.7701, 4.4166, 4.8338],
        [4.7031, 4.3558, 4.7655],
        [4.9450, 4.5780, 5.0106],
        [4.6349, 4.2924, 4.6962],
        [4.6725, 4.3270, 4.7350],
        [4.649

tensor([[ 5.9155,  5.5198,  5.9452, -0.5675, -0.6428,  0.5517, -0.4769,  1.2064,
          0.8560,  0.5065, -1.0707,  0.0122]])
sigma:  tensor([[5.9155, 5.5198, 5.9452]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.04    |
| time/              |          |
|    episodes        | 128      |
|    fps             | 1        |
|    time_elapsed    | 69       |
|    total_timesteps | 128      |
| train/             |          |
|    actor_loss      | 4.99     |
|    critic_loss     | 1.79     |
|    ent_coef        | 0.0992   |
|    ent_coef_loss   | -19.3    |
|    learning_rate   | 0.0003   |
|    n_updates       | 27       |
---------------------------------
tensor([[ 5.3981,  5.0396,  5.4252,  ...,  0.4630, -0.9783,  0.0140],
        [ 5.2701,  4.9211,  5.2967,  ...,  0.4525, -0.9554,  0.0148],
        [ 5.9011,  5.5063,  5.9307,  ...,  0.5051, -1.0684,  0.0121],
        ...,
      

tensor([[ 5.3981,  5.0396,  5.4252,  ...,  0.4630, -0.9783,  0.0140],
        [ 5.2701,  4.9211,  5.2967,  ...,  0.4525, -0.9554,  0.0148],
        [ 5.9011,  5.5063,  5.9307,  ...,  0.5051, -1.0684,  0.0121],
        ...,
        [ 6.0261,  5.6225,  6.0565,  ...,  0.5159, -1.0906,  0.0116],
        [ 5.7168,  5.3355,  5.7457,  ...,  0.4902, -1.0355,  0.0124],
        [ 5.5826,  5.2105,  5.6107,  ...,  0.4784, -1.0116,  0.0128]])
sigma:  tensor([[5.3981, 5.0396, 5.4252],
        [5.2701, 4.9211, 5.2967],
        [5.9011, 5.5063, 5.9307],
        [5.9801, 5.5798, 6.0103],
        [5.5002, 5.1348, 5.5282],
        [5.1614, 4.8202, 5.1873],
        [5.5826, 5.2105, 5.6107],
        [6.0375, 5.6331, 6.0680],
        [5.9584, 5.5596, 5.9883],
        [5.9842, 5.5835, 6.0143],
        [5.4249, 5.0647, 5.4524],
        [5.4754, 5.1117, 5.5029],
        [5.8473, 5.4567, 5.8772],
        [5.3668, 5.0111, 5.3942],
        [5.5338, 5.1659, 5.5617],
        [5.3694, 5.0122, 5.3957],
        [5.694

tensor([[ 6.7165,  6.3137,  6.7016, -0.6116, -0.7547,  0.5605, -0.5782,  1.3144,
          0.9598,  0.5406, -1.2215, -0.0173]])
sigma:  tensor([[6.7165, 6.3137, 6.7016]])
torch.Size([1, 3, 3])
tensor([[ 6.4743,  6.0876,  6.4605,  ...,  0.5211, -1.1774, -0.0154],
        [ 6.5172,  6.1267,  6.5026,  ...,  0.5240, -1.1857, -0.0161],
        [ 7.1823,  6.7489,  7.1661,  ...,  0.5770, -1.3050, -0.0206],
        ...,
        [ 6.4106,  6.0271,  6.3962,  ...,  0.5161, -1.1662, -0.0153],
        [ 6.8691,  6.4562,  6.8539,  ...,  0.5526, -1.2488, -0.0188],
        [ 6.5609,  6.1668,  6.5458,  ...,  0.5278, -1.1935, -0.0164]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[6.4743, 6.0876, 6.4605],
        [6.5172, 6.1267, 6.5026],
        [7.1823, 6.7489, 7.1661],
        [6.5172, 6.1267, 6.5026],
        [6.8922, 6.4770, 6.8772],
        [7.1303, 6.7002, 7.1142],
        [6.6219, 6.2246, 6.6070],
        [7.2549, 6.8166, 7.2386],
        [7.0126, 6.5904, 6.9969],
        [7.0854, 6.6584, 7

tensor([[ 6.4743,  6.0876,  6.4605,  ...,  0.5211, -1.1774, -0.0154],
        [ 6.5172,  6.1267,  6.5026,  ...,  0.5240, -1.1857, -0.0161],
        [ 7.1823,  6.7489,  7.1661,  ...,  0.5770, -1.3050, -0.0206],
        ...,
        [ 6.4106,  6.0271,  6.3962,  ...,  0.5161, -1.1662, -0.0153],
        [ 6.8691,  6.4562,  6.8539,  ...,  0.5526, -1.2488, -0.0188],
        [ 6.5609,  6.1668,  6.5458,  ...,  0.5278, -1.1935, -0.0164]])
sigma:  tensor([[6.4743, 6.0876, 6.4605],
        [6.5172, 6.1267, 6.5026],
        [7.1823, 6.7489, 7.1661],
        [6.5172, 6.1267, 6.5026],
        [6.8922, 6.4770, 6.8772],
        [7.1303, 6.7002, 7.1142],
        [6.6219, 6.2246, 6.6070],
        [7.2549, 6.8166, 7.2386],
        [7.0126, 6.5904, 6.9969],
        [7.0854, 6.6584, 7.0696],
        [6.9378, 6.5197, 6.9227],
        [7.0854, 6.6584, 7.0696],
        [6.6921, 6.2907, 6.6774],
        [7.2315, 6.7948, 7.2151],
        [7.1903, 6.7565, 7.1744],
        [6.9433, 6.5252, 6.9277],
        [6.943

tensor([[ 7.8274,  7.4030,  7.7570, -0.6756, -0.9057,  0.5853, -0.7103,  1.4726,
          1.1052,  0.5940, -1.4256, -0.0533]])
sigma:  tensor([[7.8274, 7.4030, 7.7570]])
torch.Size([1, 3, 3])
tensor([[ 8.7520,  8.2739,  8.6740,  ...,  0.6625, -1.5922, -0.0635],
        [ 8.3027,  7.8506,  8.2281,  ...,  0.6289, -1.5111, -0.0582],
        [ 8.1290,  7.6867,  8.0568,  ...,  0.6156, -1.4806, -0.0563],
        ...,
        [ 8.3667,  7.9112,  8.2917,  ...,  0.6339, -1.5226, -0.0585],
        [ 7.6850,  7.2689,  7.6164,  ...,  0.5827, -1.4002, -0.0516],
        [ 7.8715,  7.4446,  7.8010,  ...,  0.5969, -1.4338, -0.0537]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[8.7520, 8.2739, 8.6740],
        [8.3027, 7.8506, 8.2281],
        [8.1290, 7.6867, 8.0568],
        [8.0963, 7.6557, 8.0236],
        [7.9731, 7.5406, 7.9016],
        [8.0136, 7.5780, 7.9416],
        [8.5629, 8.0958, 8.4860],
        [7.3695, 6.9723, 7.3040],
        [7.5550, 7.1465, 7.4876],
        [8.5818, 8.1136, 8

tensor([[ 8.7520,  8.2739,  8.6740,  ...,  0.6625, -1.5922, -0.0635],
        [ 8.3027,  7.8506,  8.2281,  ...,  0.6289, -1.5111, -0.0582],
        [ 8.1290,  7.6867,  8.0568,  ...,  0.6156, -1.4806, -0.0563],
        ...,
        [ 8.3667,  7.9112,  8.2917,  ...,  0.6339, -1.5226, -0.0585],
        [ 7.6850,  7.2689,  7.6164,  ...,  0.5827, -1.4002, -0.0516],
        [ 7.8715,  7.4446,  7.8010,  ...,  0.5969, -1.4338, -0.0537]])
sigma:  tensor([[8.7520, 8.2739, 8.6740],
        [8.3027, 7.8506, 8.2281],
        [8.1290, 7.6867, 8.0568],
        [8.0963, 7.6557, 8.0236],
        [7.9731, 7.5406, 7.9016],
        [8.0136, 7.5780, 7.9416],
        [8.5629, 8.0958, 8.4860],
        [7.3695, 6.9723, 7.3040],
        [7.5550, 7.1465, 7.4876],
        [8.5818, 8.1136, 8.5049],
        [8.1908, 7.7454, 8.1174],
        [7.8715, 7.4446, 7.8010],
        [8.5883, 8.1196, 8.5111],
        [8.2050, 7.7588, 8.1315],
        [8.4746, 8.0128, 8.3988],
        [8.5629, 8.0958, 8.4860],
        [8.151

tensor([[ 9.1316,  8.6804,  8.9951, -0.7478, -1.0853,  0.6120, -0.8664,  1.6548,
          1.2704,  0.6541, -1.6638, -0.0956]])
sigma:  tensor([[9.1316, 8.6804, 8.9951]])
torch.Size([1, 3, 3])
tensor([[10.1398,  9.6347,  9.9879,  ...,  0.7244, -1.8458, -0.1092],
        [ 9.6530,  9.1733,  9.5089,  ...,  0.6899, -1.7584, -0.1030],
        [ 9.8949,  9.4029,  9.7468,  ...,  0.7071, -1.8013, -0.1064],
        ...,
        [ 9.1870,  8.7329,  9.0497,  ...,  0.6574, -1.6734, -0.0962],
        [10.0885,  9.5862,  9.9375,  ...,  0.7208, -1.8366, -0.1085],
        [ 9.9108,  9.4182,  9.7626,  ...,  0.7084, -1.8043, -0.1058]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[10.1398,  9.6347,  9.9879],
        [ 9.6530,  9.1733,  9.5089],
        [ 9.8949,  9.4029,  9.7468],
        [ 9.1870,  8.7329,  9.0497],
        [ 9.6915,  9.2105,  9.5464],
        [ 9.4332,  8.9662,  9.2923],
        [ 9.8008,  9.3135,  9.6539],
        [ 9.8949,  9.4029,  9.7468],
        [ 9.6000,  9.1237,  9.4566],

tensor([[10.1398,  9.6347,  9.9879,  ...,  0.7244, -1.8458, -0.1092],
        [ 9.6530,  9.1733,  9.5089,  ...,  0.6899, -1.7584, -0.1030],
        [ 9.8949,  9.4029,  9.7468,  ...,  0.7071, -1.8013, -0.1064],
        ...,
        [ 9.1870,  8.7329,  9.0497,  ...,  0.6574, -1.6734, -0.0962],
        [10.0885,  9.5862,  9.9375,  ...,  0.7208, -1.8366, -0.1085],
        [ 9.9108,  9.4182,  9.7626,  ...,  0.7084, -1.8043, -0.1058]])
sigma:  tensor([[10.1398,  9.6347,  9.9879],
        [ 9.6530,  9.1733,  9.5089],
        [ 9.8949,  9.4029,  9.7468],
        [ 9.1870,  8.7329,  9.0497],
        [ 9.6915,  9.2105,  9.5464],
        [ 9.4332,  8.9662,  9.2923],
        [ 9.8008,  9.3135,  9.6539],
        [ 9.8949,  9.4029,  9.7468],
        [ 9.6000,  9.1237,  9.4566],
        [10.2623,  9.7509, 10.1090],
        [ 9.0821,  8.6331,  8.9459],
        [10.1999,  9.6916, 10.0473],
        [ 9.3532,  8.8904,  9.2131],
        [ 9.1213,  8.6716,  8.9855],
        [ 9.0165,  8.5711,  8.8811],
   

tensor([[11.6540, 11.1255, 11.4228, -0.8998, -1.4157,  0.7009, -1.1494,  2.0436,
          1.5988,  0.7867, -2.1227, -0.1623]])
sigma:  tensor([[11.6540, 11.1255, 11.4228]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.09    |
| time/              |          |
|    episodes        | 132      |
|    fps             | 1        |
|    time_elapsed    | 79       |
|    total_timesteps | 132      |
| train/             |          |
|    actor_loss      | 3.95     |
|    critic_loss     | 2.78     |
|    ent_coef        | 0.099    |
|    ent_coef_loss   | -22.6    |
|    learning_rate   | 0.0003   |
|    n_updates       | 31       |
---------------------------------
tensor([[11.5653, 11.0411, 11.3349,  ...,  0.7811, -2.1063, -0.1602],
        [11.6576, 11.1293, 11.4256,  ...,  0.7874, -2.1229, -0.1614],
        [11.4024, 10.8856, 11.1757,  ...,  0.7699, -2.0774, -0.1581],
        ...,
   

tensor([[11.5653, 11.0411, 11.3349,  ...,  0.7811, -2.1063, -0.1602],
        [11.6576, 11.1293, 11.4256,  ...,  0.7874, -2.1229, -0.1614],
        [11.4024, 10.8856, 11.1757,  ...,  0.7699, -2.0774, -0.1581],
        ...,
        [11.0687, 10.5682, 10.8482,  ...,  0.7481, -2.0168, -0.1525],
        [11.0018, 10.5047, 10.7829,  ...,  0.7434, -2.0047, -0.1509],
        [12.0352, 11.4884, 11.7956,  ...,  0.8125, -2.1914, -0.1679]])
sigma:  tensor([[11.5653, 11.0411, 11.3349],
        [11.6576, 11.1293, 11.4256],
        [11.4024, 10.8856, 11.1757],
        [10.7974, 10.3105, 10.5830],
        [11.3345, 10.8217, 11.1086],
        [10.9053, 10.4129, 10.6879],
        [10.4206,  9.9520, 10.2126],
        [11.3395, 10.8256, 11.1140],
        [11.4760, 10.9556, 11.2478],
        [11.6576, 11.1293, 11.4256],
        [11.9903, 11.4457, 11.7518],
        [10.5235, 10.0496, 10.3135],
        [11.0665, 10.5666, 10.8459],
        [11.9794, 11.4353, 11.7409],
        [11.3564, 10.8427, 11.1303],
   

tensor([[13.8708, 13.2960, 13.5355, -1.0158, -1.7173,  0.7454, -1.4139,  2.3589,
          1.8783,  0.8927, -2.5263, -0.2317]])
sigma:  tensor([[13.8708, 13.2960, 13.5355]])
torch.Size([1, 3, 3])
tensor([[13.0176, 12.4799, 12.7036,  ...,  0.8384, -2.3721, -0.2155],
        [13.4959, 12.9372, 13.1697,  ...,  0.8687, -2.4589, -0.2249],
        [13.3833, 12.8293, 13.0603,  ...,  0.8612, -2.4384, -0.2232],
        ...,
        [14.0619, 13.4787, 13.7220,  ...,  0.9045, -2.5608, -0.2353],
        [12.9402, 12.4057, 12.6273,  ...,  0.8337, -2.3583, -0.2145],
        [12.5895, 12.0710, 12.2857,  ...,  0.8113, -2.2941, -0.2078]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[13.0176, 12.4799, 12.7036],
        [13.4959, 12.9372, 13.1697],
        [13.3833, 12.8293, 13.0603],
        [13.3833, 12.8293, 13.0603],
        [13.9404, 13.3627, 13.6036],
        [13.0190, 12.4812, 12.7051],
        [12.3412, 11.8324, 12.0425],
        [14.0018, 13.4212, 13.6633],
        [13.2132, 12.6679, 12.895

tensor([[13.0176, 12.4799, 12.7036,  ...,  0.8384, -2.3721, -0.2155],
        [13.4959, 12.9372, 13.1697,  ...,  0.8687, -2.4589, -0.2249],
        [13.3833, 12.8293, 13.0603,  ...,  0.8612, -2.4384, -0.2232],
        ...,
        [14.0619, 13.4787, 13.7220,  ...,  0.9045, -2.5608, -0.2353],
        [12.9402, 12.4057, 12.6273,  ...,  0.8337, -2.3583, -0.2145],
        [12.5895, 12.0710, 12.2857,  ...,  0.8113, -2.2941, -0.2078]])
sigma:  tensor([[13.0176, 12.4799, 12.7036],
        [13.4959, 12.9372, 13.1697],
        [13.3833, 12.8293, 13.0603],
        [13.3833, 12.8293, 13.0603],
        [13.9404, 13.3627, 13.6036],
        [13.0190, 12.4812, 12.7051],
        [12.3412, 11.8324, 12.0425],
        [14.0018, 13.4212, 13.6633],
        [13.2132, 12.6679, 12.8952],
        [12.5996, 12.0795, 12.2946],
        [12.8477, 12.3169, 12.5371],
        [12.2150, 11.7124, 11.9199],
        [12.0301, 11.5356, 11.7398],
        [12.9018, 12.3692, 12.5901],
        [14.0332, 13.4512, 13.6938],
   

tensor([[16.4097, 15.7895, 15.9522, -1.1409, -2.0674,  0.7856, -1.7182,  2.7128,
          2.1982,  1.0118, -2.9880, -0.3158]])
sigma:  tensor([[16.4097, 15.7895, 15.9522]])
torch.Size([1, 3, 3])
tensor([[16.0628, 15.4562, 15.6147,  ...,  0.9908, -2.9253, -0.3082],
        [16.2114, 15.5988, 15.7591,  ...,  0.9998, -2.9522, -0.3113],
        [16.1872, 15.5757, 15.7358,  ...,  0.9982, -2.9477, -0.3108],
        ...,
        [16.1036, 15.4952, 15.6544,  ...,  0.9932, -2.9327, -0.3091],
        [14.7403, 14.1859, 14.3290,  ...,  0.9104, -2.6862, -0.2811],
        [14.9048, 14.3432, 14.4888,  ...,  0.9205, -2.7158, -0.2844]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[16.0628, 15.4562, 15.6147],
        [16.2114, 15.5988, 15.7591],
        [16.1872, 15.5757, 15.7358],
        [15.7467, 15.1523, 15.3072],
        [16.1602, 15.5496, 15.7094],
        [14.9860, 14.4217, 14.5678],
        [14.7403, 14.1859, 14.3290],
        [16.1893, 15.5778, 15.7380],
        [16.1893, 15.5778, 15.738

tensor([[16.0628, 15.4562, 15.6147,  ...,  0.9908, -2.9253, -0.3082],
        [16.2114, 15.5988, 15.7591,  ...,  0.9998, -2.9522, -0.3113],
        [16.1872, 15.5757, 15.7358,  ...,  0.9982, -2.9477, -0.3108],
        ...,
        [16.1036, 15.4952, 15.6544,  ...,  0.9932, -2.9327, -0.3091],
        [14.7403, 14.1859, 14.3290,  ...,  0.9104, -2.6862, -0.2811],
        [14.9048, 14.3432, 14.4888,  ...,  0.9205, -2.7158, -0.2844]])
sigma:  tensor([[16.0628, 15.4562, 15.6147],
        [16.2114, 15.5988, 15.7591],
        [16.1872, 15.5757, 15.7358],
        [15.7467, 15.1523, 15.3072],
        [16.1602, 15.5496, 15.7094],
        [14.9860, 14.4217, 14.5678],
        [14.7403, 14.1859, 14.3290],
        [16.1893, 15.5778, 15.7380],
        [16.1893, 15.5778, 15.7380],
        [14.2286, 13.6953, 13.8326],
        [16.2783, 15.6631, 15.8242],
        [14.8114, 14.2541, 14.3984],
        [16.3287, 15.7114, 15.8731],
        [14.1530, 13.6221, 13.7585],
        [16.3287, 15.7114, 15.8731],
   

tensor([[18.0781, 17.4590, 17.5117, -1.2013, -2.3119,  0.7674, -1.9341,  2.9106,
          2.3963,  1.0698, -3.2910, -0.3835]])
sigma:  tensor([[18.0781, 17.4590, 17.5117]])
torch.Size([1, 3, 3])
tensor([[18.1677, 17.5452, 17.5990,  ...,  1.0746, -3.3071, -0.3862],
        [18.2912, 17.6644, 17.7183,  ...,  1.0819, -3.3290, -0.3885],
        [17.6963, 17.0913, 17.1420,  ...,  1.0480, -3.2215, -0.3749],
        ...,
        [17.1960, 16.6076, 16.6568,  ...,  1.0182, -3.1310, -0.3640],
        [18.8184, 18.1726, 18.2285,  ...,  1.1129, -3.4246, -0.4003],
        [18.2516, 17.6258, 17.6795,  ...,  1.0798, -3.3222, -0.3879]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[18.1677, 17.5452, 17.5990],
        [18.2912, 17.6644, 17.7183],
        [17.6963, 17.0913, 17.1420],
        [18.7640, 18.1200, 18.1756],
        [16.7881, 16.2153, 16.2622],
        [17.4543, 16.8569, 16.9071],
        [17.5610, 16.9605, 17.0107],
        [17.0199, 16.4390, 16.4863],
        [18.7376, 18.0948, 18.150

tensor([[18.1677, 17.5452, 17.5990,  ...,  1.0746, -3.3071, -0.3862],
        [18.2912, 17.6644, 17.7183,  ...,  1.0819, -3.3290, -0.3885],
        [17.6963, 17.0913, 17.1420,  ...,  1.0480, -3.2215, -0.3749],
        ...,
        [17.1960, 16.6076, 16.6568,  ...,  1.0182, -3.1310, -0.3640],
        [18.8184, 18.1726, 18.2285,  ...,  1.1129, -3.4246, -0.4003],
        [18.2516, 17.6258, 17.6795,  ...,  1.0798, -3.3222, -0.3879]])
sigma:  tensor([[18.1677, 17.5452, 17.5990],
        [18.2912, 17.6644, 17.7183],
        [17.6963, 17.0913, 17.1420],
        [18.7640, 18.1200, 18.1756],
        [16.7881, 16.2153, 16.2622],
        [17.4543, 16.8569, 16.9071],
        [17.5610, 16.9605, 17.0107],
        [17.0199, 16.4390, 16.4863],
        [18.7376, 18.0948, 18.1504],
        [17.8110, 17.2017, 17.2528],
        [18.0781, 17.4590, 17.5117],
        [17.3688, 16.7756, 16.8245],
        [18.2912, 17.6644, 17.7183],
        [16.8724, 16.2966, 16.3432],
        [19.0718, 18.4172, 18.4746],
   

tensor([[19.2358, 18.6413, 18.5712, -1.2258, -2.4919,  0.7211, -2.0935,  3.0201,
          2.5242,  1.0956, -3.4977, -0.4405]])
sigma:  tensor([[19.2358, 18.6413, 18.5712]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.11    |
| time/              |          |
|    episodes        | 136      |
|    fps             | 1        |
|    time_elapsed    | 89       |
|    total_timesteps | 136      |
| train/             |          |
|    actor_loss      | 4.56     |
|    critic_loss     | 2.58     |
|    ent_coef        | 0.0989   |
|    ent_coef_loss   | -25.2    |
|    learning_rate   | 0.0003   |
|    n_updates       | 35       |
---------------------------------
tensor([[19.1608, 18.5693, 18.4991,  ...,  1.0911, -3.4844, -0.4386],
        [20.0466, 19.4269, 19.3552,  ...,  1.1415, -3.6444, -0.4603],
        [20.5868, 19.9496, 19.8764,  ...,  1.1715, -3.7421, -0.4726],
        ...,
   

tensor([[19.1608, 18.5693, 18.4991,  ...,  1.0911, -3.4844, -0.4386],
        [20.0466, 19.4269, 19.3552,  ...,  1.1415, -3.6444, -0.4603],
        [20.5868, 19.9496, 19.8764,  ...,  1.1715, -3.7421, -0.4726],
        ...,
        [18.9388, 18.3540, 18.2845,  ...,  1.0789, -3.4438, -0.4332],
        [20.8464, 20.2004, 20.1273,  ...,  1.1857, -3.7892, -0.4798],
        [19.7664, 19.1552, 19.0837,  ...,  1.1255, -3.5940, -0.4531]])
sigma:  tensor([[19.1608, 18.5693, 18.4991],
        [20.0466, 19.4269, 19.3552],
        [20.5868, 19.9496, 19.8764],
        [21.6552, 20.9831, 20.9075],
        [19.4439, 18.8433, 18.7723],
        [21.7035, 21.0297, 20.9540],
        [21.6149, 20.9441, 20.8685],
        [18.9986, 18.4126, 18.3427],
        [21.7479, 21.0730, 20.9972],
        [19.3332, 18.7365, 18.6653],
        [19.0312, 18.4444, 18.3741],
        [20.2653, 19.6384, 19.5654],
        [20.3011, 19.6734, 19.6005],
        [21.4663, 20.8003, 20.7250],
        [20.0054, 19.3863, 19.3146],
   

tensor([[24.2313, 23.5543, 23.3242, -1.4744, -3.1778,  0.7929, -2.6819,  3.7192,
          3.1518,  1.3293, -4.3949, -0.5951]])
sigma:  tensor([[24.2313, 23.5543, 23.3242]])
torch.Size([1, 3, 3])
tensor([[24.5343, 23.8481, 23.6156,  ...,  1.3454, -4.4495, -0.6024],
        [22.8911, 22.2533, 22.0347,  ...,  1.2567, -4.1529, -0.5609],
        [22.1644, 21.5473, 21.3343,  ...,  1.2168, -4.0217, -0.5421],
        ...,
        [22.3630, 21.7404, 21.5257,  ...,  1.2280, -4.0577, -0.5469],
        [24.5099, 23.8245, 23.5923,  ...,  1.3438, -4.4448, -0.6024],
        [21.4723, 20.8759, 20.6682,  ...,  1.1798, -3.8961, -0.5242]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[24.5343, 23.8481, 23.6156],
        [22.8911, 22.2533, 22.0347],
        [22.1644, 21.5473, 21.3343],
        [24.5265, 23.8408, 23.6084],
        [22.3630, 21.7404, 21.5257],
        [24.0950, 23.4219, 23.1929],
        [23.1800, 22.5340, 22.3121],
        [22.9060, 22.2675, 22.0490],
        [21.3192, 20.7271, 20.521

tensor([[24.5343, 23.8481, 23.6156,  ...,  1.3454, -4.4495, -0.6024],
        [22.8911, 22.2533, 22.0347,  ...,  1.2567, -4.1529, -0.5609],
        [22.1644, 21.5473, 21.3343,  ...,  1.2168, -4.0217, -0.5421],
        ...,
        [22.3630, 21.7404, 21.5257,  ...,  1.2280, -4.0577, -0.5469],
        [24.5099, 23.8245, 23.5923,  ...,  1.3438, -4.4448, -0.6024],
        [21.4723, 20.8759, 20.6682,  ...,  1.1798, -3.8961, -0.5242]])
sigma:  tensor([[24.5343, 23.8481, 23.6156],
        [22.8911, 22.2533, 22.0347],
        [22.1644, 21.5473, 21.3343],
        [24.5265, 23.8408, 23.6084],
        [22.3630, 21.7404, 21.5257],
        [24.0950, 23.4219, 23.1929],
        [23.1800, 22.5340, 22.3121],
        [22.9060, 22.2675, 22.0490],
        [21.3192, 20.7271, 20.5213],
        [23.9802, 23.3107, 23.0826],
        [24.1598, 23.4844, 23.2551],
        [21.5929, 20.9920, 20.7839],
        [21.9360, 21.3264, 21.1153],
        [23.5817, 22.9237, 22.6989],
        [24.7509, 24.0585, 23.8242],
   

tensor([[27.6032, 26.9031, 26.4939, -1.6097, -3.6571,  0.7941, -3.0879,  4.1483,
          3.5637,  1.4656, -4.9894, -0.7189]])
sigma:  tensor([[27.6032, 26.9031, 26.4939]])
torch.Size([1, 3, 3])
tensor([[26.0263, 25.3681, 24.9806,  ...,  1.3833, -4.7053, -0.6765],
        [24.9454, 24.3162, 23.9433,  ...,  1.3262, -4.5102, -0.6466],
        [28.0200, 27.3089, 26.8938,  ...,  1.4877, -5.0646, -0.7301],
        ...,
        [26.0166, 25.3591, 24.9715,  ...,  1.3825, -4.7032, -0.6753],
        [28.1220, 27.4083, 26.9918,  ...,  1.4930, -5.0828, -0.7328],
        [26.7176, 26.0407, 25.6436,  ...,  1.4191, -4.8296, -0.6950]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[26.0263, 25.3681, 24.9806],
        [24.9454, 24.3162, 23.9433],
        [28.0200, 27.3089, 26.8938],
        [27.5511, 26.8525, 26.4437],
        [24.6277, 24.0064, 23.6373],
        [24.7458, 24.1223, 23.7517],
        [26.0324, 25.3741, 24.9860],
        [23.8271, 23.2264, 22.8683],
        [27.5511, 26.8525, 26.443

tensor([[26.0263, 25.3681, 24.9806,  ...,  1.3833, -4.7053, -0.6765],
        [24.9454, 24.3162, 23.9433,  ...,  1.3262, -4.5102, -0.6466],
        [28.0200, 27.3089, 26.8938,  ...,  1.4877, -5.0646, -0.7301],
        ...,
        [26.0166, 25.3591, 24.9715,  ...,  1.3825, -4.7032, -0.6753],
        [28.1220, 27.4083, 26.9918,  ...,  1.4930, -5.0828, -0.7328],
        [26.7176, 26.0407, 25.6436,  ...,  1.4191, -4.8296, -0.6950]])
sigma:  tensor([[26.0263, 25.3681, 24.9806],
        [24.9454, 24.3162, 23.9433],
        [28.0200, 27.3089, 26.8938],
        [27.5511, 26.8525, 26.4437],
        [24.6277, 24.0064, 23.6373],
        [24.7458, 24.1223, 23.7517],
        [26.0324, 25.3741, 24.9860],
        [23.8271, 23.2264, 22.8683],
        [27.5511, 26.8525, 26.4437],
        [27.7930, 27.0880, 26.6762],
        [25.1560, 24.5202, 24.1444],
        [25.8444, 25.1913, 24.8056],
        [26.4819, 25.8110, 25.4177],
        [24.7458, 24.1223, 23.7517],
        [27.2899, 26.5986, 26.1934],
   

tensor([[28.9234, 28.2527, 27.6824, -1.6203, -3.8561,  0.7227, -3.2607,  4.2697,
          3.7012,  1.4954, -5.2136, -0.7848]])
sigma:  tensor([[28.9234, 28.2527, 27.6824]])
torch.Size([1, 3, 3])
tensor([[31.8781, 31.1369, 30.5110,  ...,  1.6467, -5.7444, -0.8673],
        [31.9991, 31.2550, 30.6271,  ...,  1.6530, -5.7661, -0.8706],
        [31.2751, 30.5486, 29.9339,  ...,  1.6159, -5.6359, -0.8501],
        ...,
        [27.8269, 27.1835, 26.6332,  ...,  1.4392, -5.0164, -0.7539],
        [30.5034, 29.7952, 29.1952,  ...,  1.5764, -5.4977, -0.8289],
        [31.1309, 30.4076, 29.7959,  ...,  1.6083, -5.6099, -0.8468]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[31.8781, 31.1369, 30.5110],
        [31.9991, 31.2550, 30.6271],
        [31.2751, 30.5486, 29.9339],
        [30.3966, 29.6914, 29.0930],
        [29.2550, 28.5778, 28.0003],
        [28.6769, 28.0140, 27.4474],
        [31.8211, 31.0812, 30.4565],
        [28.8579, 28.1891, 27.6199],
        [30.7186, 30.0055, 29.401

tensor([[31.8781, 31.1369, 30.5110,  ...,  1.6467, -5.7444, -0.8673],
        [31.9991, 31.2550, 30.6271,  ...,  1.6530, -5.7661, -0.8706],
        [31.2751, 30.5486, 29.9339,  ...,  1.6159, -5.6359, -0.8501],
        ...,
        [27.8269, 27.1835, 26.6332,  ...,  1.4392, -5.0164, -0.7539],
        [30.5034, 29.7952, 29.1952,  ...,  1.5764, -5.4977, -0.8289],
        [31.1309, 30.4076, 29.7959,  ...,  1.6083, -5.6099, -0.8468]])
sigma:  tensor([[31.8781, 31.1369, 30.5110],
        [31.9991, 31.2550, 30.6271],
        [31.2751, 30.5486, 29.9339],
        [30.3966, 29.6914, 29.0930],
        [29.2550, 28.5778, 28.0003],
        [28.6769, 28.0140, 27.4474],
        [31.8211, 31.0812, 30.4565],
        [28.8579, 28.1891, 27.6199],
        [30.7186, 30.0055, 29.4019],
        [28.1082, 27.4573, 26.9019],
        [31.7166, 30.9794, 30.3566],
        [31.6160, 30.8812, 30.2603],
        [29.5248, 28.8403, 28.2592],
        [28.7223, 28.0578, 27.4903],
        [28.3823, 27.7260, 27.1652],
   

tensor([[33.5189, 32.8088, 31.9978, -1.8004, -4.4935,  0.7180, -3.8072,  4.8665,
          4.2558,  1.6921, -6.0200, -0.9435]])
sigma:  tensor([[33.5189, 32.8088, 31.9978]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.1     |
| time/              |          |
|    episodes        | 140      |
|    fps             | 1        |
|    time_elapsed    | 100      |
|    total_timesteps | 140      |
| train/             |          |
|    actor_loss      | 5.99     |
|    critic_loss     | 1.75     |
|    ent_coef        | 0.0987   |
|    ent_coef_loss   | -27.1    |
|    learning_rate   | 0.0003   |
|    n_updates       | 39       |
---------------------------------
tensor([[34.5879, 33.8543, 33.0186,  ...,  1.7455, -6.2115, -0.9742],
        [32.0179, 31.3413, 30.5649,  ...,  1.6167, -5.7502, -0.8995],
        [34.6619, 33.9267, 33.0898,  ...,  1.7485, -6.2244, -0.9770],
        ...,
   

tensor([[34.5879, 33.8543, 33.0186,  ...,  1.7455, -6.2115, -0.9742],
        [32.0179, 31.3413, 30.5649,  ...,  1.6167, -5.7502, -0.8995],
        [34.6619, 33.9267, 33.0898,  ...,  1.7485, -6.2244, -0.9770],
        ...,
        [31.3609, 30.6987, 29.9372,  ...,  1.5843, -5.6326, -0.8808],
        [30.6197, 29.9741, 29.2292,  ...,  1.5474, -5.5001, -0.8594],
        [32.5400, 31.8515, 31.0633,  ...,  1.6430, -5.8441, -0.9153]])
sigma:  tensor([[34.5879, 33.8543, 33.0186],
        [32.0179, 31.3413, 30.5649],
        [34.6619, 33.9267, 33.0898],
        [35.7891, 35.0289, 34.1653],
        [32.6437, 31.9532, 31.1624],
        [34.5662, 33.8331, 32.9979],
        [33.5897, 32.8781, 32.0654],
        [36.0903, 35.3235, 34.4529],
        [33.7105, 32.9957, 32.1812],
        [34.3098, 33.5823, 32.7528],
        [32.5474, 31.8599, 31.0710],
        [31.8806, 31.2068, 30.4330],
        [33.6324, 32.9204, 32.1065],
        [36.1259, 35.3583, 34.4870],
        [33.8818, 33.1640, 32.3441],
   

tensor([[39.5257, 38.7612, 37.6598, -2.0345, -5.3130,  0.7058, -4.5238,  5.6379,
          4.9535,  1.9511, -7.0677, -1.1722]])
sigma:  tensor([[39.5257, 38.7612, 37.6598]])
torch.Size([1, 3, 3])
tensor([[40.2793, 39.4997, 38.3772,  ...,  1.9884, -7.2022, -1.1945],
        [38.0773, 37.3421, 36.2787,  ...,  1.8810, -6.8089, -1.1277],
        [38.5841, 37.8386, 36.7618,  ...,  1.9056, -6.8997, -1.1428],
        ...,
        [36.8078, 36.0984, 35.0691,  ...,  1.8189, -6.5821, -1.0888],
        [35.9341, 35.2418, 34.2368,  ...,  1.7749, -6.4260, -1.0624],
        [40.6348, 39.8481, 38.7160,  ...,  2.0058, -7.2654, -1.2053]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[40.2793, 39.4997, 38.3772],
        [38.0773, 37.3421, 36.2787],
        [38.5841, 37.8386, 36.7618],
        [37.9367, 37.2044, 36.1447],
        [37.6974, 36.9693, 35.9169],
        [36.7064, 35.9990, 34.9723],
        [38.8706, 38.1189, 37.0353],
        [39.5257, 38.7612, 37.6598],
        [36.6024, 35.8957, 34.873

tensor([[40.2793, 39.4997, 38.3772,  ...,  1.9884, -7.2022, -1.1945],
        [38.0773, 37.3421, 36.2787,  ...,  1.8810, -6.8089, -1.1277],
        [38.5841, 37.8386, 36.7618,  ...,  1.9056, -6.8997, -1.1428],
        ...,
        [36.8078, 36.0984, 35.0691,  ...,  1.8189, -6.5821, -1.0888],
        [35.9341, 35.2418, 34.2368,  ...,  1.7749, -6.4260, -1.0624],
        [40.6348, 39.8481, 38.7160,  ...,  2.0058, -7.2654, -1.2053]])
sigma:  tensor([[40.2793, 39.4997, 38.3772],
        [38.0773, 37.3421, 36.2787],
        [38.5841, 37.8386, 36.7618],
        [37.9367, 37.2044, 36.1447],
        [37.6974, 36.9693, 35.9169],
        [36.7064, 35.9990, 34.9723],
        [38.8706, 38.1189, 37.0353],
        [39.5257, 38.7612, 37.6598],
        [36.6024, 35.8957, 34.8731],
        [39.6141, 38.8481, 37.7433],
        [35.8290, 35.1389, 34.1366],
        [36.5808, 35.8766, 34.8538],
        [40.9414, 40.1486, 39.0086],
        [36.6024, 35.8957, 34.8731],
        [38.9179, 38.1656, 37.0800],
   

tensor([[42.8630, 42.1101, 40.7729, -2.1191, -5.7681,  0.6221, -4.9374,  6.0101,
          5.2973,  2.0769, -7.6254, -1.3271]])
sigma:  tensor([[42.8630, 42.1101, 40.7729]])
torch.Size([1, 3, 3])
tensor([[44.6048, 43.8209, 42.4295,  ...,  2.1607, -7.9338, -1.3826],
        [39.1861, 38.5002, 37.2738,  ...,  1.9000, -6.9721, -1.2110],
        [45.2205, 44.4251, 43.0151,  ...,  2.1905, -8.0435, -1.4013],
        ...,
        [43.8241, 43.0542, 41.6874,  ...,  2.1231, -7.7957, -1.3577],
        [45.2131, 44.4179, 43.0080,  ...,  2.1899, -8.0419, -1.4014],
        [41.2131, 40.4904, 39.2026,  ...,  1.9975, -7.3323, -1.2750]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[44.6048, 43.8209, 42.4295],
        [39.1861, 38.5002, 37.2738],
        [45.2205, 44.4251, 43.0151],
        [44.8856, 44.0963, 42.6966],
        [39.7185, 39.0227, 37.7802],
        [44.4607, 43.6796, 42.2923],
        [44.6224, 43.8378, 42.4459],
        [40.0631, 39.3619, 38.1088],
        [44.3920, 43.6120, 42.226

tensor([[44.6048, 43.8209, 42.4295,  ...,  2.1607, -7.9338, -1.3826],
        [39.1861, 38.5002, 37.2738,  ...,  1.9000, -6.9721, -1.2110],
        [45.2205, 44.4251, 43.0151,  ...,  2.1905, -8.0435, -1.4013],
        ...,
        [43.8241, 43.0542, 41.6874,  ...,  2.1231, -7.7957, -1.3577],
        [45.2131, 44.4179, 43.0080,  ...,  2.1899, -8.0419, -1.4014],
        [41.2131, 40.4904, 39.2026,  ...,  1.9975, -7.3323, -1.2750]])
sigma:  tensor([[44.6048, 43.8209, 42.4295],
        [39.1861, 38.5002, 37.2738],
        [45.2205, 44.4251, 43.0151],
        [44.8856, 44.0963, 42.6966],
        [39.7185, 39.0227, 37.7802],
        [44.4607, 43.6796, 42.2923],
        [44.6224, 43.8378, 42.4459],
        [40.0631, 39.3619, 38.1088],
        [44.3920, 43.6120, 42.2268],
        [41.2131, 40.4904, 39.2026],
        [41.3904, 40.6642, 39.3710],
        [37.5401, 36.8852, 35.7083],
        [41.8814, 41.1472, 39.8388],
        [39.0844, 38.4019, 37.1782],
        [45.3927, 44.5941, 43.1788],
   

tensor([[44.1556, 43.4670, 41.9517, -2.0884, -5.9395,  0.5081, -5.1271,  6.1047,
          5.3777,  2.1125, -7.7988, -1.4178]])
sigma:  tensor([[44.1556, 43.4670, 41.9517]])
torch.Size([1, 3, 3])
tensor([[45.7620, 45.0481, 43.4788,  ...,  2.1892, -8.0821, -1.4698],
        [47.3449, 46.6036, 44.9825,  ...,  2.2638, -8.3620, -1.5227],
        [50.5743, 49.7810, 48.0509,  ...,  2.4177, -8.9309, -1.6280],
        ...,
        [48.1052, 47.3520, 45.7046,  ...,  2.3001, -8.4953, -1.5475],
        [43.9420, 43.2563, 41.7485,  ...,  2.1021, -7.7619, -1.4110],
        [45.2263, 44.5207, 42.9690,  ...,  2.1639, -7.9880, -1.4527]],
       grad_fn=<AddmmBackward0>)
sigma:  tensor([[45.7620, 45.0481, 43.4788],
        [47.3449, 46.6036, 44.9825],
        [50.5743, 49.7810, 48.0509],
        [50.1697, 49.3830, 47.6666],
        [47.0984, 46.3614, 44.7484],
        [43.2682, 42.5938, 41.1082],
        [45.5700, 44.8580, 43.2954],
        [44.2713, 43.5798, 42.0608],
        [44.6382, 43.9415, 42.409

tensor([[45.7620, 45.0481, 43.4788,  ...,  2.1892, -8.0821, -1.4698],
        [47.3449, 46.6036, 44.9825,  ...,  2.2638, -8.3620, -1.5227],
        [50.5743, 49.7810, 48.0509,  ...,  2.4177, -8.9309, -1.6280],
        ...,
        [48.1052, 47.3520, 45.7046,  ...,  2.3001, -8.4953, -1.5475],
        [43.9420, 43.2563, 41.7485,  ...,  2.1021, -7.7619, -1.4110],
        [45.2263, 44.5207, 42.9690,  ...,  2.1639, -7.9880, -1.4527]])
sigma:  tensor([[45.7620, 45.0481, 43.4788],
        [47.3449, 46.6036, 44.9825],
        [50.5743, 49.7810, 48.0509],
        [50.1697, 49.3830, 47.6666],
        [47.0984, 46.3614, 44.7484],
        [43.2682, 42.5938, 41.1082],
        [45.5700, 44.8580, 43.2954],
        [44.2713, 43.5798, 42.0608],
        [44.6382, 43.9415, 42.4093],
        [47.0553, 46.3200, 44.7077],
        [42.4190, 41.7580, 40.3004],
        [47.0984, 46.3614, 44.7484],
        [43.8044, 43.1218, 41.6184],
        [44.9183, 44.2171, 42.6764],
        [49.6558, 48.8778, 47.1781],
   

tensor([[50.4787, 49.7866, 47.9161, -2.2741, -6.7827,  0.4379, -5.9095,  6.8910,
          6.0589,  2.3917, -8.8440, -1.6779]])
sigma:  tensor([[50.4787, 49.7866, 47.9161]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.12    |
| time/              |          |
|    episodes        | 144      |
|    fps             | 1        |
|    time_elapsed    | 110      |
|    total_timesteps | 144      |
| train/             |          |
|    actor_loss      | 6.22     |
|    critic_loss     | 1.94     |
|    ent_coef        | 0.0986   |
|    ent_coef_loss   | -28.8    |
|    learning_rate   | 0.0003   |
|    n_updates       | 43       |
---------------------------------
tensor([[56.3638, 55.5863, 53.5023,  ...,  2.6688, -9.8738, -1.8776],
        [56.3054, 55.5287, 53.4466,  ...,  2.6661, -9.8637, -1.8757],
        [54.0733, 53.3286, 51.3275,  ...,  2.5609, -9.4735, -1.8005],
        ...,
   

tensor([[56.3638, 55.5863, 53.5023,  ...,  2.6688, -9.8738, -1.8776],
        [56.3054, 55.5287, 53.4466,  ...,  2.6661, -9.8637, -1.8757],
        [54.0733, 53.3286, 51.3275,  ...,  2.5609, -9.4735, -1.8005],
        ...,
        [55.2340, 54.4729, 52.4295,  ...,  2.6159, -9.6762, -1.8396],
        [51.2679, 50.5629, 48.6644,  ...,  2.4285, -8.9826, -1.7056],
        [56.0768, 55.3035, 53.2300,  ...,  2.6552, -9.8235, -1.8678]])
sigma:  tensor([[56.3638, 55.5863, 53.5023],
        [56.3054, 55.5287, 53.4466],
        [54.0733, 53.3286, 51.3275],
        [49.6316, 48.9506, 47.1111],
        [48.4473, 47.7832, 45.9863],
        [49.2031, 48.5274, 46.7035],
        [47.4863, 46.8360, 45.0746],
        [56.5796, 55.7989, 53.7071],
        [53.8568, 53.1155, 51.1227],
        [52.1077, 51.3914, 49.4620],
        [53.1254, 52.3936, 50.4280],
        [50.4991, 49.8064, 47.9347],
        [48.7986, 48.1297, 46.3198],
        [54.8903, 54.1343, 52.1038],
        [56.5414, 55.7613, 53.6708],
   

  File "/Users/weijiazeng/opt/anaconda3/envs/BPP/lib/python3.8/runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Users/weijiazeng/opt/anaconda3/envs/BPP/lib/python3.8/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/Users/weijiazeng/opt/anaconda3/envs/BPP/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/weijiazeng/opt/anaconda3/envs/BPP/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance
    app.start()
  File "/Users/weijiazeng/opt/anaconda3/envs/BPP/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 677, in start
    self.io_loop.start()
  File "/Users/weijiazeng/opt/anaconda3/envs/BPP/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "/Users/weijiazeng/opt/anaconda3/envs/BPP/lib/python3.8/asyncio/base_events.py", line 570, in run_f

RuntimeError: Function 'MulBackward0' returned nan values in its 1th output.

In [None]:
%load_ext tensorboard
import tensorflow as tf
import numpy as np
import datetime
import matplotlib.pyplot as plt
from tensorboard.backend.event_processing import event_accumulator

In [None]:
%tensorboard --logdir ./sac

# Experiments for batch operations

In [None]:
sigma = torch.ones(1, 3)
omiga = torch.normal(torch.zeros(1, 3), sigma)
omiga

In [None]:
def transfer(omiga):
    omiga_0, omiga_1, omiga_2 = omiga[0], omiga[1], omiga[2]
    omiga_hat = torch.tensor([[0, -omiga_2, omiga_1],
                                [omiga_2, 0, -omiga_0],
                                [-omiga_1, omiga_0, 0]])
    return omiga_hat

In [None]:
from functorch import vmap
batch_transfer = vmap(transfer)
batch_transfer(omiga)

In [None]:
from liegroups.torch import SO3
C = SO3.exp(torch.Tensor([[1,2,3],
                          [0,0,0]]))
print(torch.Tensor([[1,2,3],
                          [0,0,0]]).size())
SO3.log(C)

In [None]:
np.log(1)

# Question to ask: the original wahba problem action is (4,), in our case actions are (3,3).