In [1]:
import numpy as np
import torch
from tsGaussian.torch_tsgaussian import TangentSpaceGaussian
from stable_baselines_utils import TangentSpaceGaussian as TSG
from pytorch3d.transforms.so3 import (
    so3_exp_map,
    so3_relative_angle,
)

  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'pytorch3d'

In [2]:
pip show pytorch3d

[0mNote: you may need to restart the kernel to use updated packages.


In [3]:
tg = TangentSpaceGaussian(None)

# Test liegroup torch

In [4]:
from liegroups.torch import SO3

In [6]:
C = SO3.exp(torch.Tensor([[1,2,3],
                         [0,0,0]]))
C

<liegroups.torch.so3.SO3Matrix>
| tensor([[[-0.6949,  0.7135,  0.0893],
|          [-0.1920, -0.3038,  0.9332],
|          [ 0.6930,  0.6313,  0.3481]],
| 
|         [[ 1.0000,  0.0000,  0.0000],
|          [ 0.0000,  1.0000,  0.0000],
|          [ 0.0000,  0.0000,  1.0000]]])

# Test torch_tsgaussian sample

In [7]:
R_mu = torch.eye(3).reshape((1,3,3))
sigma = torch.ones(3).reshape((1,3))

In [8]:
R_quat, R_x = tg.rsample(R_mu, sigma)

torch.Size([1, 3, 3])


In [9]:
torch.bmm(torch.transpose(R_x, 1, 2), R_x)

tensor([[[1.0000e+00, 0.0000e+00, 1.4901e-08],
         [0.0000e+00, 1.0000e+00, 0.0000e+00],
         [1.4901e-08, 0.0000e+00, 1.0000e+00]]])

# Test torch_tsgaussian normal_term

In [10]:
sigma = torch.ones(3).reshape((1,3))
sigma

tensor([[1., 1., 1.]])

In [11]:
tg.normal_term(sigma)

tensor([15.7496])

# Test torch_tsgaussian log_map

In [12]:
R_1 = torch.eye(3).reshape((1, 3, 3))
R_2 = torch.eye(3).reshape((1, 3, 3))

In [13]:
tg.log_map(R_1, R_2)

tensor([0., 0., 0.])

# Test torch_tsgaussian log_probs

In [14]:
R_x = torch.eye(3).reshape((1,3,3))
R_mu = torch.zeros(3,3).reshape((1,3,3))
R_x = R_x.repeat(5, 1, 1)
R_mu = R_mu.repeat(5, 1, 1)
sigma = torch.ones(3).reshape((1,3))

In [15]:
# tg.log_probs(R_x, R_mu, sigma)

In [16]:
np.e ** (-2.7568)

0.06349462641817973

all codes run for torch_tsgaussian now, need to check it's correctness and make it into batch version.

# Test TangentSpaceGaussian actions_from_params

In [17]:
tsg = TSG(None)

In [18]:
print(tsg.distribution)

<tsGaussian.torch_tsgaussian.TangentSpaceGaussian object at 0x7f313fb89a30>


In [19]:
tsg

<stable_baselines_utils.TangentSpaceGaussian at 0x7f313fc64a60>

In [20]:
tsg.actions_from_params(torch.eye(3).reshape((1,3,3)), torch.ones(3).reshape((1,3)))

torch.Size([1, 3, 3])


(tensor([[-0.3156, -0.1786, -0.0179,  0.9318]]),
 tensor([[[ 0.9356,  0.1460, -0.3216],
          [ 0.0795,  0.8001,  0.5945],
          [ 0.3441, -0.5818,  0.7370]]]))

# Test TangentSpaceGaussian log_prob_from_params

In [21]:
torch.eye(3).repeat(2,1,1).size()

torch.Size([2, 3, 3])

In [22]:
torch.ones(3).repeat(2,1).size()

torch.Size([2, 3])

In [23]:
# tsg.log_prob_from_params(torch.eye(3).repeat(2,1,1), torch.ones(3))

In [24]:
x = torch.randn(2, 4, 4)
y = torch.linalg.inv(x)
y

tensor([[[-0.4389,  0.0445,  0.1714, -0.0683],
         [-0.0901, -0.6699,  0.1211,  0.4760],
         [ 0.4391, -0.5234, -0.1454, -0.1739],
         [-0.1612,  0.4287, -0.6449, -0.0786]],

        [[ 1.3434, -2.5311, -1.5495,  0.5026],
         [-1.0322,  0.5764,  0.8813, -0.7227],
         [ 1.0227, -2.7010, -3.9711,  1.9032],
         [ 0.1490, -1.1189, -0.8147,  0.7077]]])

Again, codes can run, but need to check correctness.

# Try to run training

In [25]:
import torch
from absl import app, flags
from stable_baselines3 import SAC, PPO
from envs.wahba import Wahba
from stable_baselines_utils import CustomSACPolicy, \
    CustomCNN

In [30]:
def main(argv):
    env = Wahba()
    device = torch.device('cpu')
    policy_kwargs = dict(
        features_extractor_class = CustomCNN,
        features_extractor_kwargs = dict(features_dim = 256))
    policy_kwargs['n_critics'] = 1
    policy_kwargs['share_features_extractor'] = False
    policy = CustomSACPolicy
    model = SAC(policy, env, verbose = 1, ent_coef = 0.0,
                policy_kwargs = policy_kwargs, device = device, tensorboard_log='./sac')
    model.learn(total_timesteps = 50000, eval_freq = 100, n_eval_episodes = 100)

In [31]:
from torch import autograd
with autograd.detect_anomaly():
    main(None)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./sac/SAC_7
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -5.98    |
| time/              |          |
|    episodes        | 4        |
|    fps             | 602      |
|    time_elapsed    | 0        |
|    total_timesteps | 4        |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.39    |
| time/              |          |
|    episodes        | 8        |
|    fps             | 659      |
|    time_elapsed    | 0        |
|    total_timesteps | 8        |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -6.08    |
| time/              |          |
|    episodes        | 12       |

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -5.9     |
| time/              |          |
|    episodes        | 100      |
|    fps             | 802      |
|    time_elapsed    | 0        |
|    total_timesteps | 100      |
---------------------------------


  with autograd.detect_anomaly():


vec:  torch.Size([1, 3]) torch.Size([1, 3, 3])
torch.Size([1, 3, 3])
vec:  torch.Size([256, 3]) torch.Size([256, 3, 3])
torch.Size([256, 3, 3])
actions:  tensor([[-0.1445, -0.0889,  0.9838,  0.0571],
        [-0.1243, -0.0774,  0.9877,  0.0543],
        [-0.1665, -0.1016,  0.9788,  0.0620],
        ...,
        [-0.1642, -0.0979,  0.9797,  0.0609],
        [-0.1179, -0.0719,  0.9891,  0.0511],
        [-0.1525, -0.0905,  0.9825,  0.0572]])
actions_mat:  tensor([[[-0.4943, -0.8526,  0.1697],
         [-0.7384,  0.3088, -0.5995],
         [-0.4587,  0.4217,  0.7822]],

        [[-0.4997, -0.8383,  0.2181],
         [-0.7296,  0.2717, -0.6276],
         [-0.4668,  0.4728,  0.7474]],

        [[-0.4909, -0.8613,  0.1312],
         [-0.7372,  0.3304, -0.5894],
         [-0.4642,  0.3861,  0.7971]],

        ...,

        [[-0.5085, -0.8514,  0.1287],
         [-0.7297,  0.3467, -0.5893],
         [-0.4572,  0.3936,  0.7976]],

        [[-0.5146, -0.8289,  0.2193],
         [-0.7267,  0.2860

  File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/home/fantasticoven/.local/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/home/fantasticoven/.local/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance
    app.start()
  File "/home/fantasticoven/.local/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 707, in start
    self.io_loop.start()
  File "/home/fantasticoven/.local/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
    self._run_once()
  File "/usr/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
    handle._run()
  File "/usr/lib/python3.8/asyncio/events.

RuntimeError: Function 'MseLossBackward' returned nan values in its 0th output.

In [28]:
%load_ext tensorboard
import tensorflow as tf
import numpy as np
import datetime
import matplotlib.pyplot as plt
from tensorboard.backend.event_processing import event_accumulator

In [29]:
%tensorboard --logdir ./sac

# Experiments for batch operations

In [None]:
sigma = torch.ones(1, 3)
omiga = torch.normal(torch.zeros(1, 3), sigma)
omiga

In [None]:
def transfer(omiga):
    omiga_0, omiga_1, omiga_2 = omiga[0], omiga[1], omiga[2]
    omiga_hat = torch.tensor([[0, -omiga_2, omiga_1],
                                [omiga_2, 0, -omiga_0],
                                [-omiga_1, omiga_0, 0]])
    return omiga_hat

In [None]:
from functorch import vmap
batch_transfer = vmap(transfer)
batch_transfer(omiga)

In [None]:
from liegroups.torch import SO3
C = SO3.exp(torch.Tensor([[1,2,3],
                          [0,0,0]]))
print(torch.Tensor([[1,2,3],
                          [0,0,0]]).size())
SO3.log(C)

In [None]:
np.log(1)

# Question to ask: the original wahba problem action is (4,), in our case actions are (3,3).