In [1]:
import numpy as np
import torch
from tsGaussian.torch_tsgaussian import TangentSpaceGaussian
from stable_baselines_utils import TangentSpaceGaussian as TSG
# from pytorch3d.transforms.so3 import (
#     so3_exp_map,
#     so3_relative_angle,
# )

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tg = TangentSpaceGaussian(None)

# Test liegroup torch

In [3]:
from liegroups.torch import SO3

In [4]:
C = SO3.exp(torch.Tensor([[1,2,3],
                         [0,0,0]]))
C

<liegroups.torch.so3.SO3Matrix>
| tensor([[[-0.6949,  0.7135,  0.0893],
|          [-0.1920, -0.3038,  0.9332],
|          [ 0.6930,  0.6313,  0.3481]],
| 
|         [[ 1.0000,  0.0000,  0.0000],
|          [ 0.0000,  1.0000,  0.0000],
|          [ 0.0000,  0.0000,  1.0000]]])

# Test torch_tsgaussian sample

In [5]:
R_mu = torch.eye(3).reshape((1,3,3))
sigma = torch.ones(3).reshape((1,3))

In [6]:
R_quat, R_x = tg.rsample(R_mu, sigma)

sigma:  tensor([[1., 1., 1.]])
torch.Size([1, 3, 3])


In [7]:
torch.bmm(torch.transpose(R_x, 1, 2), R_x)

tensor([[[1.0000e+00, 2.9802e-08, 0.0000e+00],
         [2.9802e-08, 1.0000e+00, 2.9802e-08],
         [0.0000e+00, 2.9802e-08, 1.0000e+00]]])

# Test torch_tsgaussian normal_term

In [8]:
sigma = torch.ones(3).reshape((1,3))
sigma

tensor([[1., 1., 1.]])

In [9]:
tg.normal_term(sigma)

tensor([15.7496])

# Test torch_tsgaussian log_map

In [10]:
R_1 = torch.eye(3).reshape((1, 3, 3))
R_2 = torch.eye(3).reshape((1, 3, 3))

In [11]:
tg.log_map(R_1, R_2)

tensor([0., 0., 0.])

# Test torch_tsgaussian log_probs

In [12]:
R_x = torch.eye(3).reshape((1,3,3))
R_mu = torch.zeros(3,3).reshape((1,3,3))
R_x = R_x.repeat(5, 1, 1)
R_mu = R_mu.repeat(5, 1, 1)
sigma = torch.ones(3).reshape((1,3))

In [13]:
# tg.log_probs(R_x, R_mu, sigma)

In [14]:
np.e ** (-2.7568)

0.06349462641817973

all codes run for torch_tsgaussian now, need to check it's correctness and make it into batch version.

# Test TangentSpaceGaussian actions_from_params

In [15]:
tsg = TSG(None)

In [16]:
print(tsg.distribution)

<tsGaussian.torch_tsgaussian.TangentSpaceGaussian object at 0x7fb5f410d100>


In [17]:
tsg

<stable_baselines_utils.TangentSpaceGaussian at 0x7fb5f410d0d0>

In [18]:
tsg.actions_from_params(torch.eye(3).reshape((1,3,3)), torch.ones(3).reshape((1,3)))

sigma:  tensor([[1., 1., 1.]])
torch.Size([1, 3, 3])


(tensor([[-0.2309, -0.1942,  0.2587,  0.9176]]),
 tensor([[[ 0.7908, -0.3850, -0.4759],
          [ 0.5644,  0.7595,  0.3233],
          [ 0.2370, -0.5243,  0.8179]]]))

# Test TangentSpaceGaussian log_prob_from_params

In [19]:
torch.eye(3).repeat(2,1,1).size()

torch.Size([2, 3, 3])

In [20]:
torch.ones(3).repeat(2,1).size()

torch.Size([2, 3])

In [21]:
# tsg.log_prob_from_params(torch.eye(3).repeat(2,1,1), torch.ones(3))

In [22]:
x = torch.randn(2, 4, 4)
y = torch.linalg.inv(x)
y

tensor([[[ 0.8972,  0.3311, -2.0802,  0.1954],
         [-0.4544,  0.0150, -0.1455,  0.1318],
         [-0.1425,  0.6359, -1.6084, -1.3160],
         [ 0.6918, -0.0042,  3.0271,  1.8034]],

        [[-0.2175, -0.6450, -0.9562,  0.3594],
         [ 2.2492,  0.1025,  2.0803, -0.1886],
         [ 1.8147, -0.1330,  2.0608, -0.5000],
         [-1.9430, -0.4256, -1.6181,  0.7988]]])

Again, codes can run, but need to check correctness.

# Try to run training

In [23]:
import torch
from absl import app, flags
from stable_baselines3 import SAC, PPO
from envs.wahba import Wahba
from stable_baselines_utils import CustomSACPolicy, \
    CustomCNN

In [24]:
def main(argv):
    env = Wahba()
    device = torch.device('cpu')
    policy_kwargs = dict(
        features_extractor_class = CustomCNN,
        features_extractor_kwargs = dict(features_dim = 256))
    policy_kwargs['n_critics'] = 1
    policy_kwargs['share_features_extractor'] = False
    policy = CustomSACPolicy
    model = SAC(policy, env, verbose = 1, ent_coef = 'auto_0.1',
                policy_kwargs = policy_kwargs, device = device)
    model.learn(total_timesteps = 500, eval_freq = 100, n_eval_episodes = 100)

In [25]:
from torch import autograd
with autograd.detect_anomaly():
    main(None)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -4.72    |
| time/              |          |
|    episodes        | 4        |
|    fps             | 703      |
|    time_elapsed    | 0        |
|    total_timesteps | 4        |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.04    |
| time/              |          |
|    episodes        | 8        |
|    fps             | 637      |
|    time_elapsed    | 0        |
|    total_timesteps | 8        |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -5.74    |
| time/              |          |
|    episodes        | 12       |
|    fps             |

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -5.89    |
| time/              |          |
|    episodes        | 100      |
|    fps             | 701      |
|    time_elapsed    | 0        |
|    total_timesteps | 100      |
---------------------------------


  with autograd.detect_anomaly():


tensor([[-0.0044,  0.0345,  0.0262,  0.0098,  0.0355, -0.0051, -0.0545,  0.0045,
         -0.0503,  0.0180,  0.0259,  0.0383]])
sigma:  tensor([[0.0044, 0.0345, 0.0262]])
torch.Size([1, 3, 3])
tensor([[-0.0042,  0.0349,  0.0265,  ...,  0.0180,  0.0259,  0.0381],
        [-0.0044,  0.0345,  0.0261,  ...,  0.0180,  0.0261,  0.0383],
        [-0.0043,  0.0348,  0.0262,  ...,  0.0181,  0.0260,  0.0380],
        ...,
        [-0.0042,  0.0348,  0.0262,  ...,  0.0181,  0.0260,  0.0380],
        [-0.0043,  0.0347,  0.0263,  ...,  0.0181,  0.0258,  0.0383],
        [-0.0042,  0.0346,  0.0260,  ...,  0.0179,  0.0264,  0.0384]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0042, 0.0349, 0.0265],
        [0.0044, 0.0345, 0.0261],
        [0.0043, 0.0348, 0.0262],
        [0.0043, 0.0347, 0.0263],
        [0.0046, 0.0348, 0.0260],
        [0.0043, 0.0348, 0.0262],
        [0.0045, 0.0348, 0.0262],
        [0.0045, 0.0348, 0.0264],
        [0.0043, 0.0349, 0.0262],
        [0.0043, 0.0348, 0.

        [0.0042, 0.0346, 0.0260]], grad_fn=<AbsBackward>)
torch.Size([256, 3, 3])
actions:  tensor([[ 0.1181, -0.3068, -0.5625,  0.7587],
        [ 0.1294, -0.2949, -0.5762,  0.7512],
        [ 0.1176, -0.3134, -0.5676,  0.7521],
        ...,
        [ 0.1241, -0.3005, -0.5645,  0.7587],
        [ 0.1277, -0.2942, -0.5821,  0.7472],
        [ 0.1359, -0.2891, -0.5835,  0.7466]])
actions_mat:  tensor([[[ 0.1790,  0.7810, -0.5984],
         [-0.9259,  0.3394,  0.1659],
         [ 0.3326,  0.5244,  0.7839]],

        [[ 0.1620,  0.7894, -0.5921],
         [-0.9420,  0.3024,  0.1454],
         [ 0.2939,  0.5342,  0.7926]],

        [[ 0.1591,  0.7801, -0.6051],
         [-0.9276,  0.3279,  0.1789],
         [ 0.3380,  0.5328,  0.7758]],

        ...,

        [[ 0.1820,  0.7821, -0.5960],
         [-0.9312,  0.3318,  0.1510],
         [ 0.3159,  0.5275,  0.7886]],

        [[ 0.1493,  0.7948, -0.5883],
         [-0.9450,  0.2897,  0.1516],
         [ 0.2909,  0.5333,  0.7943]],

        [[

tensor([[-0.0044,  0.0345,  0.0260,  0.0097,  0.0360, -0.0051, -0.0544,  0.0042,
         -0.0500,  0.0178,  0.0262,  0.0384]])
sigma:  tensor([[0.0044, 0.0345, 0.0260]])
torch.Size([1, 3, 3])
tensor([[-0.0047,  0.0347,  0.0262,  ...,  0.0180,  0.0262,  0.0385],
        [-0.0043,  0.0345,  0.0264,  ...,  0.0181,  0.0261,  0.0381],
        [-0.0044,  0.0348,  0.0262,  ...,  0.0179,  0.0258,  0.0381],
        ...,
        [-0.0044,  0.0345,  0.0262,  ...,  0.0180,  0.0259,  0.0383],
        [-0.0043,  0.0347,  0.0263,  ...,  0.0181,  0.0260,  0.0381],
        [-0.0047,  0.0345,  0.0263,  ...,  0.0181,  0.0261,  0.0385]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0047, 0.0347, 0.0262],
        [0.0043, 0.0345, 0.0264],
        [0.0044, 0.0348, 0.0262],
        [0.0048, 0.0347, 0.0266],
        [0.0043, 0.0347, 0.0259],
        [0.0045, 0.0348, 0.0264],
        [0.0044, 0.0348, 0.0261],
        [0.0044, 0.0345, 0.0261],
        [0.0041, 0.0346, 0.0265],
        [0.0042, 0.0348, 0.

tensor([[-0.0041,  0.0346,  0.0262,  0.0099,  0.0360, -0.0048, -0.0543,  0.0042,
         -0.0495,  0.0179,  0.0264,  0.0383]])
sigma:  tensor([[0.0041, 0.0346, 0.0262]])
torch.Size([1, 3, 3])
tensor([[-0.0043,  0.0347,  0.0263,  ...,  0.0182,  0.0260,  0.0380],
        [-0.0043,  0.0349,  0.0262,  ...,  0.0182,  0.0260,  0.0380],
        [-0.0045,  0.0347,  0.0260,  ...,  0.0181,  0.0261,  0.0382],
        ...,
        [-0.0043,  0.0346,  0.0263,  ...,  0.0179,  0.0262,  0.0383],
        [-0.0044,  0.0347,  0.0262,  ...,  0.0180,  0.0259,  0.0382],
        [-0.0043,  0.0344,  0.0261,  ...,  0.0180,  0.0263,  0.0384]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0043, 0.0347, 0.0263],
        [0.0043, 0.0349, 0.0262],
        [0.0045, 0.0347, 0.0260],
        [0.0047, 0.0346, 0.0259],
        [0.0043, 0.0348, 0.0262],
        [0.0042, 0.0344, 0.0264],
        [0.0043, 0.0347, 0.0263],
        [0.0045, 0.0345, 0.0258],
        [0.0045, 0.0345, 0.0258],
        [0.0043, 0.0344, 0.

tensor([[-0.0043,  0.0347,  0.0263,  0.0098,  0.0357, -0.0051, -0.0549,  0.0044,
         -0.0501,  0.0182,  0.0261,  0.0380]])
sigma:  tensor([[0.0043, 0.0347, 0.0263]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -5.88    |
| time/              |          |
|    episodes        | 104      |
|    fps             | 37       |
|    time_elapsed    | 2        |
|    total_timesteps | 104      |
| train/             |          |
|    actor_loss      | 0.0259   |
|    critic_loss     | 18.4     |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 3        |
---------------------------------
tensor([[-0.0046,  0.0346,  0.0263,  ...,  0.0182,  0.0261,  0.0383],
        [-0.0041,  0.0346,  0.0265,  ...,  0.0182,  0.0260,  0.0384],
        [-0.0042,  0.0347,  0.0264,  ...,  0.0181,  0.0259,  0.0379],
        ...,
        [-0.0043,  0.0349,  0.0262,  ...

tensor([[-0.0046,  0.0346,  0.0261,  0.0098,  0.0359, -0.0051, -0.0547,  0.0043,
         -0.0504,  0.0180,  0.0258,  0.0383]])
sigma:  tensor([[0.0046, 0.0346, 0.0261]])
torch.Size([1, 3, 3])
tensor([[-0.0048,  0.0346,  0.0263,  ...,  0.0180,  0.0256,  0.0385],
        [-0.0041,  0.0347,  0.0264,  ...,  0.0181,  0.0263,  0.0384],
        [-0.0043,  0.0347,  0.0264,  ...,  0.0180,  0.0258,  0.0382],
        ...,
        [-0.0043,  0.0346,  0.0263,  ...,  0.0182,  0.0260,  0.0380],
        [-0.0041,  0.0346,  0.0264,  ...,  0.0180,  0.0264,  0.0383],
        [-0.0043,  0.0348,  0.0262,  ...,  0.0181,  0.0260,  0.0379]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0048, 0.0346, 0.0263],
        [0.0041, 0.0347, 0.0264],
        [0.0043, 0.0347, 0.0264],
        [0.0043, 0.0346, 0.0263],
        [0.0043, 0.0349, 0.0262],
        [0.0046, 0.0346, 0.0261],
        [0.0043, 0.0345, 0.0264],
        [0.0049, 0.0347, 0.0263],
        [0.0045, 0.0345, 0.0258],
        [0.0044, 0.0347, 0.

tensor([[-0.0042,  0.0345,  0.0264,  0.0098,  0.0356, -0.0050, -0.0550,  0.0044,
         -0.0502,  0.0181,  0.0261,  0.0382]])
sigma:  tensor([[0.0042, 0.0345, 0.0264]])
torch.Size([1, 3, 3])
tensor([[-0.0044,  0.0349,  0.0263,  ...,  0.0182,  0.0260,  0.0379],
        [-0.0042,  0.0348,  0.0263,  ...,  0.0181,  0.0260,  0.0381],
        [-0.0046,  0.0346,  0.0261,  ...,  0.0180,  0.0258,  0.0383],
        ...,
        [-0.0046,  0.0346,  0.0263,  ...,  0.0182,  0.0260,  0.0382],
        [-0.0048,  0.0347,  0.0266,  ...,  0.0181,  0.0259,  0.0387],
        [-0.0045,  0.0347,  0.0260,  ...,  0.0181,  0.0261,  0.0382]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0044, 0.0349, 0.0263],
        [0.0042, 0.0348, 0.0263],
        [0.0046, 0.0346, 0.0261],
        [0.0043, 0.0347, 0.0263],
        [0.0042, 0.0349, 0.0263],
        [0.0043, 0.0347, 0.0264],
        [0.0047, 0.0346, 0.0259],
        [0.0049, 0.0347, 0.0263],
        [0.0044, 0.0347, 0.0262],
        [0.0046, 0.0346, 0.

tensor([[-0.0044,  0.0345,  0.0260,  0.0097,  0.0357, -0.0051, -0.0545,  0.0043,
         -0.0502,  0.0178,  0.0262,  0.0384]])
sigma:  tensor([[0.0044, 0.0345, 0.0260]])
torch.Size([1, 3, 3])
tensor([[-0.0047,  0.0347,  0.0263,  ...,  0.0179,  0.0256,  0.0385],
        [-0.0044,  0.0345,  0.0259,  ...,  0.0177,  0.0261,  0.0383],
        [-0.0045,  0.0348,  0.0264,  ...,  0.0180,  0.0262,  0.0383],
        ...,
        [-0.0043,  0.0347,  0.0262,  ...,  0.0182,  0.0261,  0.0379],
        [-0.0047,  0.0347,  0.0263,  ...,  0.0179,  0.0256,  0.0385],
        [-0.0048,  0.0345,  0.0263,  ...,  0.0179,  0.0262,  0.0386]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0047, 0.0347, 0.0263],
        [0.0044, 0.0345, 0.0259],
        [0.0045, 0.0348, 0.0264],
        [0.0043, 0.0347, 0.0263],
        [0.0041, 0.0344, 0.0263],
        [0.0043, 0.0347, 0.0264],
        [0.0046, 0.0346, 0.0261],
        [0.0045, 0.0347, 0.0263],
        [0.0044, 0.0345, 0.0260],
        [0.0042, 0.0347, 0.

tensor([[-0.0047,  0.0347,  0.0263,  ...,  0.0179,  0.0256,  0.0385],
        [-0.0044,  0.0345,  0.0259,  ...,  0.0177,  0.0261,  0.0383],
        [-0.0045,  0.0348,  0.0264,  ...,  0.0180,  0.0262,  0.0383],
        ...,
        [-0.0043,  0.0347,  0.0262,  ...,  0.0182,  0.0261,  0.0379],
        [-0.0047,  0.0347,  0.0263,  ...,  0.0179,  0.0256,  0.0385],
        [-0.0048,  0.0345,  0.0263,  ...,  0.0179,  0.0262,  0.0386]])
sigma:  tensor([[0.0047, 0.0347, 0.0263],
        [0.0044, 0.0345, 0.0259],
        [0.0045, 0.0348, 0.0264],
        [0.0043, 0.0347, 0.0263],
        [0.0041, 0.0344, 0.0263],
        [0.0043, 0.0347, 0.0264],
        [0.0046, 0.0346, 0.0261],
        [0.0045, 0.0347, 0.0263],
        [0.0044, 0.0345, 0.0260],
        [0.0042, 0.0347, 0.0264],
        [0.0043, 0.0344, 0.0258],
        [0.0045, 0.0345, 0.0265],
        [0.0045, 0.0347, 0.0260],
        [0.0049, 0.0347, 0.0263],
        [0.0044, 0.0349, 0.0263],
        [0.0049, 0.0347, 0.0263],
        [0.004

tensor([[-0.0042,  0.0345,  0.0264,  0.0099,  0.0357, -0.0050, -0.0549,  0.0046,
         -0.0502,  0.0181,  0.0260,  0.0383]])
sigma:  tensor([[0.0042, 0.0345, 0.0264]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -5.76    |
| time/              |          |
|    episodes        | 108      |
|    fps             | 17       |
|    time_elapsed    | 6        |
|    total_timesteps | 108      |
| train/             |          |
|    actor_loss      | 0.192    |
|    critic_loss     | 17.2     |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 7        |
---------------------------------
tensor([[-0.0043,  0.0347,  0.0263,  ...,  0.0181,  0.0260,  0.0381],
        [-0.0048,  0.0345,  0.0263,  ...,  0.0179,  0.0262,  0.0386],
        [-0.0047,  0.0347,  0.0262,  ...,  0.0180,  0.0262,  0.0385],
        ...,
        [-0.0044,  0.0348,  0.0261,  ...

tensor([[-0.0043,  0.0347,  0.0263,  ...,  0.0181,  0.0260,  0.0381],
        [-0.0048,  0.0345,  0.0263,  ...,  0.0179,  0.0262,  0.0386],
        [-0.0047,  0.0347,  0.0262,  ...,  0.0180,  0.0262,  0.0385],
        ...,
        [-0.0044,  0.0348,  0.0261,  ...,  0.0181,  0.0261,  0.0379],
        [-0.0044,  0.0349,  0.0261,  ...,  0.0180,  0.0260,  0.0378],
        [-0.0049,  0.0347,  0.0263,  ...,  0.0179,  0.0256,  0.0387]])
sigma:  tensor([[0.0043, 0.0347, 0.0263],
        [0.0048, 0.0345, 0.0263],
        [0.0047, 0.0347, 0.0262],
        [0.0045, 0.0347, 0.0263],
        [0.0047, 0.0345, 0.0263],
        [0.0045, 0.0346, 0.0260],
        [0.0042, 0.0345, 0.0264],
        [0.0045, 0.0347, 0.0261],
        [0.0043, 0.0348, 0.0262],
        [0.0047, 0.0345, 0.0263],
        [0.0042, 0.0345, 0.0263],
        [0.0043, 0.0344, 0.0261],
        [0.0042, 0.0346, 0.0262],
        [0.0043, 0.0348, 0.0262],
        [0.0043, 0.0349, 0.0262],
        [0.0043, 0.0346, 0.0261],
        [0.004

tensor([[-0.0043,  0.0345,  0.0259,  0.0098,  0.0358, -0.0051, -0.0544,  0.0042,
         -0.0502,  0.0177,  0.0265,  0.0385]])
sigma:  tensor([[0.0043, 0.0345, 0.0259]])
torch.Size([1, 3, 3])
tensor([[-0.0042,  0.0345,  0.0263,  ...,  0.0180,  0.0262,  0.0384],
        [-0.0044,  0.0343,  0.0261,  ...,  0.0180,  0.0265,  0.0386],
        [-0.0041,  0.0346,  0.0262,  ...,  0.0179,  0.0264,  0.0382],
        ...,
        [-0.0042,  0.0348,  0.0263,  ...,  0.0181,  0.0260,  0.0381],
        [-0.0042,  0.0346,  0.0260,  ...,  0.0179,  0.0264,  0.0384],
        [-0.0046,  0.0346,  0.0263,  ...,  0.0182,  0.0261,  0.0383]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0042, 0.0345, 0.0263],
        [0.0044, 0.0343, 0.0261],
        [0.0041, 0.0346, 0.0262],
        [0.0043, 0.0347, 0.0263],
        [0.0043, 0.0347, 0.0263],
        [0.0042, 0.0349, 0.0265],
        [0.0047, 0.0346, 0.0259],
        [0.0045, 0.0345, 0.0265],
        [0.0041, 0.0346, 0.0262],
        [0.0045, 0.0345, 0.

tensor([[-0.0049,  0.0347,  0.0263,  0.0098,  0.0356, -0.0051, -0.0544,  0.0044,
         -0.0503,  0.0179,  0.0256,  0.0388]])
sigma:  tensor([[0.0049, 0.0347, 0.0263]])
torch.Size([1, 3, 3])
tensor([[-0.0044,  0.0347,  0.0262,  ...,  0.0181,  0.0261,  0.0381],
        [-0.0042,  0.0349,  0.0263,  ...,  0.0179,  0.0258,  0.0382],
        [-0.0046,  0.0346,  0.0261,  ...,  0.0180,  0.0258,  0.0383],
        ...,
        [-0.0047,  0.0347,  0.0262,  ...,  0.0180,  0.0262,  0.0385],
        [-0.0043,  0.0348,  0.0262,  ...,  0.0182,  0.0260,  0.0379],
        [-0.0041,  0.0344,  0.0263,  ...,  0.0181,  0.0263,  0.0385]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0044, 0.0347, 0.0262],
        [0.0042, 0.0349, 0.0263],
        [0.0046, 0.0346, 0.0261],
        [0.0043, 0.0347, 0.0263],
        [0.0046, 0.0346, 0.0263],
        [0.0045, 0.0347, 0.0259],
        [0.0045, 0.0348, 0.0262],
        [0.0042, 0.0348, 0.0262],
        [0.0043, 0.0348, 0.0262],
        [0.0043, 0.0347, 0.

tensor([[-0.0043,  0.0347,  0.0261,  0.0096,  0.0357, -0.0051, -0.0550,  0.0042,
         -0.0500,  0.0180,  0.0261,  0.0381]])
sigma:  tensor([[0.0043, 0.0347, 0.0261]])
torch.Size([1, 3, 3])
tensor([[-0.0044,  0.0348,  0.0262,  ...,  0.0180,  0.0261,  0.0380],
        [-0.0042,  0.0344,  0.0264,  ...,  0.0181,  0.0260,  0.0382],
        [-0.0041,  0.0344,  0.0263,  ...,  0.0181,  0.0263,  0.0385],
        ...,
        [-0.0044,  0.0345,  0.0260,  ...,  0.0178,  0.0262,  0.0384],
        [-0.0044,  0.0348,  0.0260,  ...,  0.0181,  0.0261,  0.0380],
        [-0.0049,  0.0347,  0.0263,  ...,  0.0180,  0.0257,  0.0388]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0044, 0.0348, 0.0262],
        [0.0042, 0.0344, 0.0264],
        [0.0041, 0.0344, 0.0263],
        [0.0049, 0.0347, 0.0263],
        [0.0042, 0.0350, 0.0265],
        [0.0048, 0.0346, 0.0263],
        [0.0044, 0.0345, 0.0261],
        [0.0044, 0.0348, 0.0262],
        [0.0043, 0.0348, 0.0262],
        [0.0043, 0.0349, 0.

tensor([[-0.0042,  0.0346,  0.0263,  0.0096,  0.0356, -0.0050, -0.0545,  0.0047,
         -0.0501,  0.0180,  0.0264,  0.0383]])
sigma:  tensor([[0.0042, 0.0346, 0.0263]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -5.76    |
| time/              |          |
|    episodes        | 112      |
|    fps             | 12       |
|    time_elapsed    | 9        |
|    total_timesteps | 112      |
| train/             |          |
|    actor_loss      | 0.65     |
|    critic_loss     | 15.8     |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 11       |
---------------------------------
tensor([[-0.0045,  0.0345,  0.0265,  ...,  0.0181,  0.0261,  0.0385],
        [-0.0043,  0.0347,  0.0260,  ...,  0.0180,  0.0261,  0.0381],
        [-0.0044,  0.0345,  0.0261,  ...,  0.0179,  0.0261,  0.0384],
        ...,
        [-0.0044,  0.0345,  0.0261,  ...

tensor([[-0.0042,  0.0348,  0.0263,  0.0098,  0.0357, -0.0050, -0.0549,  0.0042,
         -0.0500,  0.0181,  0.0260,  0.0380]])
sigma:  tensor([[0.0042, 0.0348, 0.0263]])
torch.Size([1, 3, 3])
tensor([[-0.0047,  0.0346,  0.0259,  ...,  0.0177,  0.0262,  0.0384],
        [-0.0047,  0.0347,  0.0261,  ...,  0.0180,  0.0257,  0.0386],
        [-0.0045,  0.0348,  0.0264,  ...,  0.0180,  0.0262,  0.0383],
        ...,
        [-0.0043,  0.0347,  0.0263,  ...,  0.0182,  0.0261,  0.0380],
        [-0.0045,  0.0347,  0.0260,  ...,  0.0181,  0.0261,  0.0382],
        [-0.0041,  0.0346,  0.0265,  ...,  0.0182,  0.0260,  0.0384]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0047, 0.0346, 0.0259],
        [0.0047, 0.0347, 0.0261],
        [0.0045, 0.0348, 0.0264],
        [0.0042, 0.0348, 0.0263],
        [0.0042, 0.0345, 0.0263],
        [0.0044, 0.0345, 0.0261],
        [0.0043, 0.0349, 0.0262],
        [0.0044, 0.0347, 0.0262],
        [0.0042, 0.0346, 0.0263],
        [0.0042, 0.0345, 0.

tensor([[-0.0043,  0.0348,  0.0263,  0.0098,  0.0357, -0.0051, -0.0549,  0.0042,
         -0.0500,  0.0182,  0.0261,  0.0380]])
sigma:  tensor([[0.0043, 0.0348, 0.0263]])
torch.Size([1, 3, 3])
tensor([[-0.0044,  0.0348,  0.0261,  ...,  0.0181,  0.0261,  0.0379],
        [-0.0043,  0.0347,  0.0263,  ...,  0.0181,  0.0260,  0.0381],
        [-0.0044,  0.0348,  0.0261,  ...,  0.0181,  0.0261,  0.0379],
        ...,
        [-0.0043,  0.0347,  0.0264,  ...,  0.0180,  0.0258,  0.0382],
        [-0.0042,  0.0346,  0.0262,  ...,  0.0179,  0.0263,  0.0383],
        [-0.0043,  0.0349,  0.0262,  ...,  0.0180,  0.0260,  0.0379]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0044, 0.0348, 0.0261],
        [0.0043, 0.0347, 0.0263],
        [0.0044, 0.0348, 0.0261],
        [0.0044, 0.0348, 0.0262],
        [0.0043, 0.0345, 0.0259],
        [0.0041, 0.0346, 0.0265],
        [0.0045, 0.0348, 0.0261],
        [0.0044, 0.0348, 0.0262],
        [0.0045, 0.0347, 0.0263],
        [0.0042, 0.0347, 0.

tensor([[-0.0041,  0.0348,  0.0264,  0.0100,  0.0357, -0.0048, -0.0543,  0.0041,
         -0.0495,  0.0179,  0.0260,  0.0382]])
sigma:  tensor([[0.0041, 0.0348, 0.0264]])
torch.Size([1, 3, 3])
tensor([[-0.0042,  0.0346,  0.0262,  ...,  0.0179,  0.0263,  0.0383],
        [-0.0042,  0.0349,  0.0262,  ...,  0.0181,  0.0260,  0.0379],
        [-0.0043,  0.0347,  0.0263,  ...,  0.0182,  0.0261,  0.0380],
        ...,
        [-0.0044,  0.0345,  0.0261,  ...,  0.0180,  0.0261,  0.0383],
        [-0.0043,  0.0345,  0.0259,  ...,  0.0177,  0.0265,  0.0385],
        [-0.0046,  0.0346,  0.0261,  ...,  0.0179,  0.0258,  0.0382]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0042, 0.0346, 0.0262],
        [0.0042, 0.0349, 0.0262],
        [0.0043, 0.0347, 0.0263],
        [0.0043, 0.0347, 0.0263],
        [0.0047, 0.0347, 0.0262],
        [0.0044, 0.0345, 0.0261],
        [0.0042, 0.0349, 0.0262],
        [0.0041, 0.0346, 0.0264],
        [0.0045, 0.0345, 0.0258],
        [0.0043, 0.0348, 0.

tensor([[-0.0040,  0.0349,  0.0264,  0.0101,  0.0357, -0.0047, -0.0544,  0.0041,
         -0.0495,  0.0180,  0.0259,  0.0382]])
sigma:  tensor([[0.0040, 0.0349, 0.0264]])
torch.Size([1, 3, 3])
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -5.84    |
| time/              |          |
|    episodes        | 116      |
|    fps             | 9        |
|    time_elapsed    | 12       |
|    total_timesteps | 116      |
| train/             |          |
|    actor_loss      | 1.99     |
|    critic_loss     | 11.2     |
|    ent_coef        | 0        |
|    learning_rate   | 0.0003   |
|    n_updates       | 15       |
---------------------------------
tensor([[-0.0047,  0.0347,  0.0261,  ...,  0.0180,  0.0257,  0.0386],
        [-0.0042,  0.0345,  0.0264,  ...,  0.0181,  0.0261,  0.0381],
        [-0.0044,  0.0347,  0.0262,  ...,  0.0182,  0.0261,  0.0381],
        ...,
        [-0.0042,  0.0345,  0.0264,  ...

tensor([[-0.0044,  0.0345,  0.0264,  0.0099,  0.0357, -0.0050, -0.0547,  0.0045,
         -0.0501,  0.0182,  0.0262,  0.0383]])
sigma:  tensor([[0.0044, 0.0345, 0.0264]])
torch.Size([1, 3, 3])
tensor([[-0.0048,  0.0345,  0.0263,  ...,  0.0179,  0.0262,  0.0386],
        [-0.0049,  0.0347,  0.0263,  ...,  0.0180,  0.0257,  0.0388],
        [-0.0044,  0.0349,  0.0263,  ...,  0.0182,  0.0260,  0.0379],
        ...,
        [-0.0045,  0.0345,  0.0265,  ...,  0.0181,  0.0261,  0.0385],
        [-0.0042,  0.0345,  0.0264,  ...,  0.0181,  0.0260,  0.0383],
        [-0.0042,  0.0348,  0.0263,  ...,  0.0181,  0.0260,  0.0380]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0048, 0.0345, 0.0263],
        [0.0049, 0.0347, 0.0263],
        [0.0044, 0.0349, 0.0263],
        [0.0043, 0.0346, 0.0259],
        [0.0043, 0.0346, 0.0259],
        [0.0043, 0.0348, 0.0262],
        [0.0044, 0.0348, 0.0262],
        [0.0043, 0.0347, 0.0263],
        [0.0045, 0.0345, 0.0265],
        [0.0044, 0.0347, 0.

tensor([[-0.0047,  0.0346,  0.0261,  0.0099,  0.0356, -0.0051, -0.0546,  0.0044,
         -0.0504,  0.0179,  0.0256,  0.0383]])
sigma:  tensor([[0.0047, 0.0346, 0.0261]])
torch.Size([1, 3, 3])
tensor([[-0.0044,  0.0347,  0.0262,  ...,  0.0179,  0.0261,  0.0382],
        [-0.0044,  0.0348,  0.0260,  ...,  0.0181,  0.0261,  0.0380],
        [-0.0044,  0.0348,  0.0263,  ...,  0.0182,  0.0261,  0.0380],
        ...,
        [-0.0042,  0.0349,  0.0263,  ...,  0.0179,  0.0258,  0.0382],
        [-0.0045,  0.0348,  0.0261,  ...,  0.0180,  0.0262,  0.0380],
        [-0.0045,  0.0347,  0.0261,  ...,  0.0182,  0.0261,  0.0381]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0044, 0.0347, 0.0262],
        [0.0044, 0.0348, 0.0260],
        [0.0044, 0.0348, 0.0263],
        [0.0043, 0.0347, 0.0263],
        [0.0043, 0.0347, 0.0263],
        [0.0042, 0.0348, 0.0263],
        [0.0048, 0.0346, 0.0265],
        [0.0045, 0.0348, 0.0262],
        [0.0045, 0.0348, 0.0262],
        [0.0045, 0.0345, 0.

tensor([[-0.0045,  0.0348,  0.0263,  0.0098,  0.0355, -0.0051, -0.0546,  0.0044,
         -0.0499,  0.0180,  0.0261,  0.0381]])
sigma:  tensor([[0.0045, 0.0348, 0.0263]])
torch.Size([1, 3, 3])
tensor([[-0.0040,  0.0349,  0.0264,  ...,  0.0180,  0.0259,  0.0382],
        [-0.0041,  0.0344,  0.0263,  ...,  0.0181,  0.0263,  0.0385],
        [-0.0049,  0.0347,  0.0263,  ...,  0.0179,  0.0256,  0.0387],
        ...,
        [-0.0044,  0.0345,  0.0261,  ...,  0.0179,  0.0261,  0.0384],
        [-0.0044,  0.0347,  0.0262,  ...,  0.0181,  0.0261,  0.0381],
        [-0.0047,  0.0347,  0.0262,  ...,  0.0180,  0.0262,  0.0385]],
       grad_fn=<AddmmBackward>)
sigma:  tensor([[0.0040, 0.0349, 0.0264],
        [0.0041, 0.0344, 0.0263],
        [0.0049, 0.0347, 0.0263],
        [0.0043, 0.0347, 0.0263],
        [0.0042, 0.0350, 0.0265],
        [0.0048, 0.0346, 0.0265],
        [0.0044, 0.0345, 0.0264],
        [0.0043, 0.0348, 0.0262],
        [0.0043, 0.0346, 0.0262],
        [0.0044, 0.0348, 0.

tensor([[-0.0042,  0.0349,  0.0264,  0.0100,  0.0356, -0.0049, -0.0548,  0.0041,
         -0.0498,  0.0181,  0.0259,  0.0379]])
sigma:  tensor([[0.0042, 0.0349, 0.0264]])
torch.Size([1, 3, 3])


KeyboardInterrupt: 

In [None]:
%load_ext tensorboard
import tensorflow as tf
import numpy as np
import datetime
import matplotlib.pyplot as plt
from tensorboard.backend.event_processing import event_accumulator

In [None]:
%tensorboard --logdir ./sac

# Experiments for batch operations

In [None]:
sigma = torch.ones(1, 3)
omiga = torch.normal(torch.zeros(1, 3), sigma)
omiga

In [None]:
def transfer(omiga):
    omiga_0, omiga_1, omiga_2 = omiga[0], omiga[1], omiga[2]
    omiga_hat = torch.tensor([[0, -omiga_2, omiga_1],
                                [omiga_2, 0, -omiga_0],
                                [-omiga_1, omiga_0, 0]])
    return omiga_hat

In [None]:
from functorch import vmap
batch_transfer = vmap(transfer)
batch_transfer(omiga)

In [None]:
from liegroups.torch import SO3
C = SO3.exp(torch.Tensor([[1,2,3],
                          [0,0,0]]))
print(torch.Tensor([[1,2,3],
                          [0,0,0]]).size())
SO3.log(C)

In [None]:
np.log(1)

# Question to ask: the original wahba problem action is (4,), in our case actions are (3,3).