In [2]:
import math
import torch
import torch.nn as nn
from kernels import RBF
from copy import deepcopy

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
K = RBF()

In [4]:
X = torch.randn(4,5)
Y = torch.randn(8,5)

In [5]:
K(X,Y).shape

torch.Size([4, 8])

In [6]:
X

tensor([[-1.4335,  0.5734,  0.0320, -1.6215, -1.4939],
        [-0.1914, -0.1111,  0.8338,  0.2083, -0.3777],
        [-0.2607, -1.0506,  0.0431, -0.1642,  0.5468],
        [-0.0816, -0.8182,  0.3603,  0.6003, -0.8229]])

In [7]:
K(X,X)

tensor([[1.0000, 0.0108, 0.0016, 0.0031],
        [0.0108, 1.0000, 0.2090, 0.5068],
        [0.0016, 0.2090, 1.0000, 0.1914],
        [0.0031, 0.5068, 0.1914, 1.0000]])

In [8]:
Y

tensor([[ 1.4444,  1.2142,  2.0666,  0.2998, -0.2833],
        [-0.9098, -0.8398, -0.5643, -1.2406,  0.7542],
        [ 0.2916,  0.9453,  0.0495,  0.4850,  0.7981],
        [-1.7931, -0.5190,  0.2280,  0.3369, -1.2903],
        [-0.0423,  2.1562,  0.0412,  0.6128, -0.2299],
        [ 0.0770, -0.0808, -0.2820,  0.0927, -0.1821],
        [-0.7883,  0.3864,  1.6741,  0.2237, -0.6312],
        [-0.5955, -0.4297,  0.8611, -1.2079, -0.0347]])

In [9]:
class VectorizedLinear(nn.Module):
    def __init__(self, in_features: int, out_features: int, ensemble_size: int):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.ensemble_size = ensemble_size

        self.weight = nn.Parameter(torch.empty(ensemble_size, in_features, out_features))
        self.bias = nn.Parameter(torch.empty(ensemble_size, 1, out_features))

        self.reset_parameters()

    def reset_parameters(self):
        # default pytorch init for nn.Linear module
        for layer in range(self.ensemble_size):
            nn.init.kaiming_uniform_(self.weight[layer], a=math.sqrt(5))

        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.weight[0])
        bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
        nn.init.uniform_(self.bias, -bound, bound)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # input: [ensemble_size, batch_size, input_size]
        # weight: [ensemble_size, input_size, out_size]
        # out: [ensemble_size, batch_size, out_size]
        return x @ self.weight + self.bias


In [10]:
class VectorizedCritic(nn.Module):
    def __init__(
        self, state_dim: int, action_dim: int, hidden_dim: int, num_critics: int
    ):
        super().__init__()
        self.critic = nn.Sequential(
            VectorizedLinear(state_dim + action_dim, hidden_dim, num_critics),
            nn.ReLU(),
            VectorizedLinear(hidden_dim, hidden_dim, num_critics),
            nn.ReLU(),
            VectorizedLinear(hidden_dim, hidden_dim, num_critics),
            nn.ReLU(),
            VectorizedLinear(hidden_dim, 1, num_critics),
        )
        # init as in the EDAC paper
        for layer in self.critic[::2]:
            torch.nn.init.constant_(layer.bias, 0.1)

        torch.nn.init.uniform_(self.critic[-1].weight, -3e-3, 3e-3)
        torch.nn.init.uniform_(self.critic[-1].bias, -3e-3, 3e-3)

        self.num_critics = num_critics

    def forward(self, state: torch.Tensor, action: torch.Tensor) -> torch.Tensor:
        # [..., batch_size, state_dim + action_dim]
        state_action = torch.cat([state, action], dim=-1)
        if state_action.dim() != 3:
            assert state_action.dim() == 2
            # [num_critics, batch_size, state_dim + action_dim]
            state_action = state_action.unsqueeze(0).repeat_interleave(
                self.num_critics, dim=0
            )
        assert state_action.dim() == 3
        assert state_action.shape[0] == self.num_critics
        # [num_critics, batch_size]
        q_values = self.critic(state_action).squeeze(-1)
        return q_values

In [11]:
critic = VectorizedCritic(
        17, 6, 256, 10
    )

In [12]:
for parameters in critic.parameters():
    parameters=parameters.reshape(10,-1)
    kernel_values = K(parameters, parameters.detach())
    print(kernel_values.shape)

torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])
torch.Size([10, 10])


In [13]:
all_parameters = torch.cat([parameters.reshape(10,-1) for parameters in critic.parameters()], dim=1)
kernel_values = K(all_parameters, all_parameters.detach())

In [14]:
kernel_grads = torch.autograd.grad(kernel_values.sum(), all_parameters)[0]

In [15]:
kernel_grads[0].mean(0).shape
# kernel_grads.shape

torch.Size([])

In [16]:
kernel_grads[0].shape
all_parameters.shape

torch.Size([10, 137985])

In [17]:
actions = torch.stack([torch.stack([torch.randn(6) for i in range(10)], dim=0) for i in range(3)], dim=0)
states = torch.stack([torch.randn(17).repeat(10, 1) for i in range(3)], dim=0)
print(actions.shape)
print(states.shape)
currentQ = torch.cat([states, actions], dim=1).mean(0, keepdim=True)
indices=currentQ.max(1).indices
states[0,15]
states[0,indices]
#indices=currentQ.max(dim=1).indices


torch.Size([3, 10, 6])
torch.Size([3, 10, 17])


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 17 but got size 6 for tensor number 1 in the list.

In [18]:
optimizer=torch.optim.Adam(
        critic.parameters(), lr=0.1
    )

In [147]:
type(optimizer.param_groups[0]['params'])
param_grads = [param.grad for param in optimizer.param_groups[0]['params']]
print(param_grads[0])
optimizer.zero_grad()
for param in optimizer.param_groups[0]['params']:
    param.grad = torch.ones_like(param)

tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        ...,

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0., 

In [151]:
optimizer.param_groups[0]["params"].grad

AttributeError: 'list' object has no attribute 'grad'

In [99]:
all_grad = torch.cat([param.grad.view(10,-1) for param in optimizer.param_groups[0]['params']], dim=1)
all_grad.shape

torch.Size([10, 137985])

In [103]:
optimizer.zero_grad()
torch.cat([param.grad.reshape(10,-1) for param in optimizer.param_groups[0]['params']], dim=1) = all_grad

SyntaxError: cannot assign to function call here. Maybe you meant '==' instead of '='? (448636623.py, line 2)

In [56]:
all_parameters = torch.cat([parameters.reshape(10,-1) for parameters in optimizer.param_groups[0]['params']], dim=1).retain_grad()

In [137]:
actions = torch.stack([torch.randn(6) for i in range(10)], dim=0)
states = torch.randn(17).repeat(10, 1)
q_target = torch.randn(10,1)
q_values = critic(states, actions)
print(q_values.shape)
# [ensemble_size, batch_size] - [1, batch_size]
critic_loss = ((q_values - q_target.view(1, -1)) ** 2).mean(dim=1).sum(dim=0)
[param.retain_grad() for param in optimizer.param_groups[0]['params']]
critic_loss.backward()

torch.Size([10, 10])


In [None]:
actions = torch.stack([torch.randn(6) for i in range(10)], dim=0)
states = torch.randn(17).repeat(10, 1)
q_values = critic(states, actions)

In [None]:
class SVGD:
    def __init__(self, kernel, optimizer):
        self.K = kernel
        self.optim = optimizer
        self.params = torch.cat(
            [
                parameters.reshape(10, -1)
                for parameters in self.optim.param_groups[0]["params"]
            ],
            dim=1,
        )

    def phi(self, loss):
        params = self.params.detach().requires_grad_(True)
        print(params.shape)
        print(loss.shape)

        score_function = torch.autograd.grad(loss, params)[0]

        K_params = self.K(params, params.detach())
        grad_K = -torch.autograd.grad(K_params.sum(), params)[0]

        phi = (K_params.detach().matmul(score_function) + grad_K) / params.size(0)

        return phi

    def step(self, loss):
        self.optim.zero_grad()
        self.params.grad = -self.phi(loss)
        self.optim.step()

In [44]:
[param.retain_grad() for param in critic.parameters()]

[None, None, None, None, None, None, None, None]

In [65]:
param = [param for param in critic.parameters()][0]
param.grad.view(1,-1)

tensor([[-0.0002,  0.0002,  0.0002,  ..., -0.0003, -0.0002,  0.0001]])

In [49]:
params = torch.cat(
    [
        parameters.reshape(10, -1)
        for parameters in optimizer.param_groups[0]["params"]
    ],
    dim=1,
).retain_grad()
print(params.shape)

NameError: name 'optimizer' is not defined

In [114]:
#params = torch.cat([parameters.reshape(10,-1) for parameters in critic.parameters()], dim=1)
#params = params.detach().requires_grad_(True)
actions = torch.stack([torch.randn(6) for i in range(10)], dim=0)
states = torch.randn(17).repeat(10, 1)
q_target = torch.randn(10,1).detach()
q_values = critic(states, actions)

# [ensemble_size, batch_size] - [1, batch_size]
critic_loss = ((q_values - q_target.view(1, -1)) ** 2).mean(dim=1).sum(dim=0)

critic_loss.backward()


In [112]:
print(critic.parameters().__next__().grad.shape)

torch.Size([10, 23, 256])


In [182]:
optimizer.param_groups[0]["params"][0].shape

torch.Size([10, 23, 256])

In [None]:

K_params = self.K(params, params.detach())
grad_K = -torch.autograd.grad(K_params.sum(), params)[0]

phi = (K_params.detach().matmul(score_function) + grad_K) / params.size(0)

In [None]:
class SVGD:
    def __init__(self, kernel, optimizer):
        self.K = kernel
        self.optim = optimizer
        self.params = torch.cat(
            [
                parameters.view(10, -1)
                for parameters in self.optim.param_groups[0]["params"]
            ],
            dim=1,
        )

    def backward(self, loss_function, **args):
        loss = loss_function(**args)
        self.optim.zero_grad()
        loss.backward()

        score_function = torch.cat(
            [
                parameters.grad.reshape(10, -1)
                for parameters in self.optim.param_groups[0]["params"]
            ],
            dim=1,
        )

        self.optim.zero_grad()
        params = self.params.detach().requires_grad_(True)

        K_params = self.K(params, params.detach())
        grad_K = -torch.autograd.grad(K_params.sum(), params)[0]

        phi = (K_params.detach().matmul(score_function) + grad_K) / params.size(0)

        phi = score_function

        self.phi = phi

        return loss

    def step(self):
        self.optim.zero_grad()
        self.params.grad = -self.phi
        self.optim.step()

In [152]:
optimizer=torch.optim.Adam(
        critic.parameters(), lr=0.1
    )

In [178]:
actions = torch.stack([torch.randn(6) for i in range(10)], dim=0)
states = torch.randn(17).repeat(10, 1)
q_target = torch.randn(10,1).detach()
q_values = critic(states, actions)

# [ensemble_size, batch_size] - [1, batch_size]
critic_loss = ((q_values - q_target.view(1, -1)) ** 2).mean(dim=1).sum(dim=0)

optimizer.zero_grad()
print(optimizer.param_groups[0]['params'][0].grad.mean())

critic_loss.backward()
print(optimizer.param_groups[0]['params'][0].grad.mean())
score_function = [deepcopy(param.grad) for param in optimizer.param_groups[0]['params']]
print(score_function[0].mean())
optimizer.zero_grad()
print(score_function[0].mean())
print(optimizer.param_groups[0]['params'][0].grad.mean())
for param, phi in zip(optimizer.param_groups[0]['params'], score_function):
    param.grad = phi
print(optimizer.param_groups[0]['params'][0].grad.mean())


tensor(0.)
tensor(6.7218e-07)
tensor(6.7218e-07)
tensor(6.7218e-07)
tensor(0.)
tensor(6.7218e-07)


In [173]:
optimizer.param_groups[0]['params'][0].view(10,-1).matmul(optimizer.param_groups[0]['params'][0].view(10,-1).T).shape

torch.Size([10, 10])

In [211]:
all_params =torch.cat([param.view(10,-1) for param in optimizer.param_groups[0]['params']], dim=1).detach().requires_grad_(True)
score = [deepcopy(param.grad) for param in optimizer.param_groups[0]['params']]


K_params = K(all_params, all_params.detach())
grad_K = -torch.autograd.grad(K_params.sum(), all_params)[0]

for layer, params in enumerate(optimizer.param_groups[0]['params']):
    shape = params.shape

    print(K_params.shape)
    print(score[layer].view(10,-1).shape)
    print(grad_K.shape)
    phi = (
        K_params.detach().matmul(score[layer].view(10, -1)) + grad_K
    ) / params.size(0)


torch.Size([10, 10])
torch.Size([10, 5888])
torch.Size([10, 137985])


RuntimeError: The size of tensor a (5888) must match the size of tensor b (137985) at non-singleton dimension 1

In [187]:
score[0].shape

torch.Size([10, 23, 256])

In [194]:
a=torch.tensor([1,2,3,4])
torch.median(a)

tensor(2)

In [None]:
class SVGD:
    def __init__(self, kernel, optimizer):
        self.K = kernel
        self.optim = optimizer
        self.params = [param for param in self.optim.param_groups[0]["params"]]

    def backward(self, loss_function, **args):
        loss = loss_function(**args)
        self.optim.zero_grad()
        loss.backward()

        score = [deepcopy(param.grad) for param in self.params]
        phi_all = []

        self.optim.zero_grad()
        for layer, params in enumerate(self.params):
            shape = params.shape
            params = params.view(10, -1).detach().requires_grad_(True)

            K_params = self.K(params, params.detach())
            grad_K = -torch.autograd.grad(K_params.sum(), params)[0]

            phi = (
                K_params.detach().matmul(score[layer].reshape(10, -1)) + grad_K
            ) / params.size(0)

            phi_all.append(phi.reshape(shape))
        # params = [param.view(10,-1).detach().requires_grad_(True) for param in self.params]

        # K_params = self.K(params, params.detach())
        # grad_K = -torch.autograd.grad(K_params.sum(), params)[0]

        # phi = (K_params.detach().matmul(score_function) + grad_K) / params.size(0)

        self.phi = phi_all

        return loss

In [233]:
optimizer.zero_grad()
print([param.grad for param in optimizer.param_groups[0]['params']][0].mean())
params = torch.cat([param.view(10,-1) for param in optimizer.param_groups[0]['params']], dim=1)
params.retain_grad()
K_params = K(params, params.detach())
K_params.sum().backward()
print(params.grad.shape)
print([param.grad for param in optimizer.param_groups[0]['params']][0].mean())
grads = [param.grad for param in optimizer.param_groups[0]['params']]

tensor(0.)
torch.Size([10, 137985])
tensor(1.2654e-13)


In [46]:
optimizer.zero_grad()
print([param.grad for param in optimizer.param_groups[0]['params']][0].mean())
params = torch.cat([param.view(10,-1) for param in optimizer.param_groups[0]['params']], dim=1)
params.retain_grad()

actions = torch.stack([torch.randn(6) for i in range(10)], dim=0)
states = torch.randn(17).repeat(10, 1)
q_target = torch.randn(10,1)
q_values = critic(states, actions)

# [ensemble_size, batch_size] - [1, batch_size]
critic_loss = ((q_values - q_target.view(1, -1)) ** 2).mean(dim=1).sum(dim=0)
critic_loss.backward()
print([param.grad for param in optimizer.param_groups[0]['params']][0].mean())
print(params.grad)

tensor(0.)
tensor(-9.4986e-07)
None


In [250]:
print(params.grad)

None


In [252]:
print([param.grad for param in optimizer.param_groups[0]['params']][0].shape)

torch.Size([10, 23, 256])


In [23]:
shapes = [param.shape for param in optimizer.param_groups[0]['params']]
shapes

[torch.Size([10, 23, 256]),
 torch.Size([10, 1, 256]),
 torch.Size([10, 256, 256]),
 torch.Size([10, 1, 256]),
 torch.Size([10, 256, 256]),
 torch.Size([10, 1, 256]),
 torch.Size([10, 256, 1]),
 torch.Size([10, 1, 1])]

In [29]:
lengths = tuple([int(param.numel()/10) for param in optimizer.param_groups[0]['params']])
lengths

(5888, 256, 65536, 256, 65536, 256, 256, 1)

In [79]:
optimizer.zero_grad()
print(optimizer.param_groups[0]['params'][0].grad.mean())

actions = torch.stack([torch.randn(6) for i in range(10)], dim=0)
states = torch.randn(17).repeat(10, 1)
q_target = torch.randn(10,1).detach()
q_values = critic(states, actions)

# [ensemble_size, batch_size] - [1, batch_size]
critic_loss = ((q_values - q_target.view(1, -1)) ** 2).mean(dim=1).sum(dim=0)

critic_loss.backward()

score = torch.cat([param.grad.view(param.size(0),-1) for param in optimizer.param_groups[0]['params']], dim=1)
print(score.mean())
optimizer.zero_grad()

params=torch.cat([param.view(param.size(0),-1) for param in optimizer.param_groups[0]['params']], dim=1).detach().requires_grad_()
K_params = K(params, params.detach())
grad_K = -torch.autograd.grad(K_params.sum(), params)[0]
phi = (
    K_params.detach().matmul(score) + grad_K
) / params.size(0)
print(phi.mean())

lengths = tuple([int(param.numel()/param.size(0)) for param in optimizer.param_groups[0]['params']])

for param, phi in zip(optimizer.param_groups[0]['params'], torch.split(phi, lengths, dim=1)):
    param.grad = phi.reshape(param.shape)
print(torch.cat([param.grad.view(10,-1) for param in optimizer.param_groups[0]['params']], dim=1).mean())

tensor(0.)
tensor(-0.0002)
tensor(-3.1934e-05)
tensor(-3.1934e-05)


In [35]:
for tensor, shape in zip(torch.split(params, lengths, dim=1), shapes):
    tensor=tensor.reshape(shape)
    print(tensor.shape)

torch.Size([10, 23, 256])
torch.Size([10, 1, 256])
torch.Size([10, 256, 256])
torch.Size([10, 1, 256])
torch.Size([10, 256, 256])
torch.Size([10, 1, 256])
torch.Size([10, 256, 1])
torch.Size([10, 1, 1])


In [86]:
torch.cat([param.grad.view(10,-1) for param in optimizer.param_groups[0]['params']], dim=1).shape

torch.Size([10, 137985])

In [1]:
import gym

In [10]:
gym.make('Hopper-v2')

Import error. Trying to rebuild mujoco_py.


DependencyNotInstalled: libglewegl.so: cannot open shared object file: No such file or directory. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)

In [9]:
%set_env LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/afs/inf.ed.ac.uk/user/s21/s2139934/.mujoco/mujoco210/bin:/usr/lib/nvidia

env: LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/afs/inf.ed.ac.uk/user/s21/s2139934/.mujoco/mujoco210/bin:/usr/lib/nvidia
