In [1]:
import numpy as np
import torch
from tsGaussian.torch_tsgaussian import TangentSpaceGaussian
from stable_baselines_utils import TangentSpaceGaussian as TSG

In [2]:
tg = TangentSpaceGaussian(None)

# Test liegroup torch

In [3]:
from liegroups.torch import SO3

In [4]:
C = SO3.exp(torch.Tensor([[1,2,3],
                         [0,0,0]]))
C

<liegroups.torch.so3.SO3Matrix>
| tensor([[[-0.6949,  0.7135,  0.0893],
|          [-0.1920, -0.3038,  0.9332],
|          [ 0.6930,  0.6313,  0.3481]],
| 
|         [[ 1.0000,  0.0000,  0.0000],
|          [ 0.0000,  1.0000,  0.0000],
|          [ 0.0000,  0.0000,  1.0000]]])

# Test torch_tsgaussian sample

In [5]:
R_mu = torch.eye(3).reshape((1,3,3))
sigma = torch.ones(3).reshape((1,3))

In [6]:
tg.rsample(R_mu, sigma)

(tensor([[-0.3707,  0.0034,  0.4710,  0.8005]]),
 tensor([[[ 0.5564, -0.7565, -0.3437],
          [ 0.7514,  0.2815,  0.5967],
          [-0.3546, -0.5903,  0.7251]]]))

# Test torch_tsgaussian normal_term

In [7]:
sigma = torch.ones(3).reshape((1,3))
sigma

tensor([[1., 1., 1.]])

In [8]:
tg.normal_term(sigma)

tensor([15.7496])

# Test torch_tsgaussian log_map

In [9]:
R_1 = torch.eye(3).reshape((1, 3, 3))
R_2 = torch.eye(3).reshape((1, 3, 3))

In [10]:
tg.log_map(R_1, R_2)

tensor([0., 0., 0.])

# Test torch_tsgaussian log_probs

In [11]:
R_x = torch.eye(3).reshape((1,3,3))
R_mu = torch.zeros(3,3).reshape((1,3,3))
R_x = R_x.repeat(5, 1, 1)
R_mu = R_mu.repeat(5, 1, 1)
sigma = torch.ones(3).reshape((1,3))

In [12]:
# tg.log_probs(R_x, R_mu, sigma)

In [13]:
np.e ** (-2.7568)

0.06349462641817973

all codes run for torch_tsgaussian now, need to check it's correctness and make it into batch version.

# Test TangentSpaceGaussian actions_from_params

In [14]:
tsg = TSG(None)

In [15]:
print(tsg.distribution)

<tsGaussian.torch_tsgaussian.TangentSpaceGaussian object at 0x7efb9a51f1c0>


In [16]:
tsg

<stable_baselines_utils.TangentSpaceGaussian at 0x7efb9a51f250>

In [17]:
tsg.actions_from_params(torch.eye(3).reshape((1,3,3)), torch.ones(3).reshape((1,3)))

(tensor([[ 0.1082, -0.0934, -0.4272,  0.8928]]),
 tensor([[[ 0.6176,  0.7425, -0.2593],
          [-0.7830,  0.6116, -0.1135],
          [ 0.0744,  0.2731,  0.9591]]]))

# Test TangentSpaceGaussian log_prob_from_params

In [18]:
torch.eye(3).repeat(2,1,1).size()

torch.Size([2, 3, 3])

In [19]:
torch.ones(3).repeat(2,1).size()

torch.Size([2, 3])

In [20]:
# tsg.log_prob_from_params(torch.eye(3).repeat(2,1,1), torch.ones(3))

In [21]:
x = torch.randn(2, 4, 4)
y = torch.linalg.inv(x)
y

tensor([[[-0.2954,  2.0724, -1.4958, -1.3383],
         [-0.1216, -0.6332,  0.2640, -0.5912],
         [-0.7004,  0.3414,  0.8563,  0.5351],
         [-0.0065, -1.2773,  0.4069,  1.1057]],

        [[-1.8532, -1.1162,  0.6208,  0.9714],
         [ 0.2529,  2.9824, -2.2123, -0.8470],
         [ 1.1799, -2.3310,  2.7071, -0.4571],
         [ 0.6332,  1.7400, -0.9433, -0.3315]]])

Again, codes can run, but need to check correctness.

# Try to run training

In [28]:
import torch
from absl import app, flags
from stable_baselines3 import SAC, PPO
from envs.wahba import Wahba
from stable_baselines_utils import CustomSACPolicy, \
    CustomCNN

In [29]:
def main(argv):
    env = Wahba()
    device = torch.device('cpu')
    policy_kwargs = dict(
        features_extractor_class = CustomCNN,
        features_extractor_kwargs = dict(features_dim = 256))
    policy_kwargs['n_critics'] = 1
    policy_kwargs['share_features_extractor'] = False
    policy = CustomSACPolicy
    model = SAC(policy, env, verbose = 1, ent_coef = 'auto_0.1',
                policy_kwargs = policy_kwargs, device = device)
    model.learn(total_timesteps = 110, eval_freq = 5, n_eval_episodes = 5)

In [30]:
main(None)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -3.5     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 752      |
|    time_elapsed    | 0        |
|    total_timesteps | 4        |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -4.21    |
| time/              |          |
|    episodes        | 8        |
|    fps             | 700      |
|    time_elapsed    | 0        |
|    total_timesteps | 8        |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -5.19    |
| time/              |          |
|    episodes        | 12       |
|    fps             |

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -5.85    |
| time/              |          |
|    episodes        | 100      |
|    fps             | 739      |
|    time_elapsed    | 0        |
|    total_timesteps | 100      |
---------------------------------
vec:  torch.Size([1, 3]) torch.Size([1, 3, 3])
vec:  torch.Size([256, 3]) torch.Size([256, 3, 3])
actions:  tensor([[-0.6685, -0.1048, -0.1106,  0.7280],
        [-0.6261, -0.1738, -0.1281,  0.7492],
        [-0.6734, -0.0889, -0.1104,  0.7255],
        ...,
        [-0.6203, -0.1117, -0.1426,  0.7632],
        [-0.6281, -0.0836, -0.1530,  0.7583],
        [-0.6109, -0.1230, -0.1546,  0.7667]])
actions_mat:  tensor([[[ 9.5358e-01,  3.0110e-01, -4.6405e-03],
         [-2.0980e-02,  8.1802e-02,  9.9643e-01],
         [ 3.0040e-01, -9.5008e-01,  8.4322e-02]],

        [[ 9.0676e-01,  4.0959e-01, -1.0012e-01],
         [ 2.5767e-02,  1.8318e-01,  9.8274e-0

vec:  torch.Size([1, 3]) torch.Size([1, 3, 3])
vec:  torch.Size([256, 3]) torch.Size([256, 3, 3])
actions:  tensor([[nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        ...,
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan]])
actions_mat:  tensor([[[nan, nan, nan],
         [nan, nan, nan],
         [nan, nan, nan]],

        [[nan, nan, nan],
         [nan, nan, nan],
         [nan, nan, nan]],

        [[nan, nan, nan],
         [nan, nan, nan],
         [nan, nan, nan]],

        ...,

        [[nan, nan, nan],
         [nan, nan, nan],
         [nan, nan, nan]],

        [[nan, nan, nan],
         [nan, nan, nan],
         [nan, nan, nan]],

        [[nan, nan, nan],
         [nan, nan, nan],
         [nan, nan, nan]]], grad_fn=<UnsafeViewBackward>)
vec:  torch.Size([256, 3]) torch.Size([256, 3, 3])
actions:  tensor([[nan, nan, nan, nan],
        [nan, nan, nan, nan],
        [nan, nan, nan, nan],
        .

# Experiments for batch operations

In [25]:
sigma = torch.ones(1, 3)
omiga = torch.normal(torch.zeros(1, 3), sigma)
omiga

tensor([[ 1.3748, -0.0172, -0.4849]])

In [26]:
def transfer(omiga):
    omiga_0, omiga_1, omiga_2 = omiga[0], omiga[1], omiga[2]
    omiga_hat = torch.tensor([[0, -omiga_2, omiga_1],
                                [omiga_2, 0, -omiga_0],
                                [-omiga_1, omiga_0, 0]])
    return omiga_hat

In [27]:
from functorch import vmap
batch_transfer = vmap(transfer)
batch_transfer(omiga)

ImportError: /home/fantasticoven/.local/lib/python3.8/site-packages/functorch/_C.so: undefined symbol: _ZNK3c104Type14isSubtypeOfExtERKS0_PSo

In [None]:
from liegroups.torch import SO3
C = SO3.exp(torch.Tensor([[1,2,3],
                          [0,0,0]]))
print(torch.Tensor([[1,2,3],
                          [0,0,0]]).size())
SO3.log(C)

In [None]:
np.log(1)

# Question to ask: the original wahba problem action is (4,), in our case actions are (3,3).