In [1]:
from models import loader
import numpy as np
import torch as th
import sys
sys.path.append("..")

# Maxmize $D_{KL}$
$$
\begin{aligned}
\underset{\hat{s} \in B(s)}{\arg \min } L_{\operatorname{MAD}}(\hat{s}) &=\underset{\hat{s} \in B(s)}{\arg \max } D_{\mathrm{KL}}(\pi(\cdot \mid s) \| \pi(\cdot \mid \hat{s})) \\
&=\underset{\hat{s} \in B(s)}{\arg \max }\left(\pi_{\theta_{\pi}}(s)-\pi_{\theta_{\pi}}(\hat{s})\right)^{\top} \Sigma_s^{-1}\left(\pi_{\theta_{\pi}}(s)-\pi_{\theta_{\pi}}(\hat{s})\right)
\end{aligned}
$$

In [2]:
pi_net = loader.get_actor_net("AntBulletEnv-v0", "ppo2")

if pi_net.logstd is not None:
    sigma_inv = 1 / th.exp(pi_net.logstd)


def loss_fn(s_0, s_hat):
    diff = pi_net(s_0) - pi_net(s_hat)
    return -th.sum(diff*sigma_inv*diff)


s_0 = th.zeros(28)
s_hat = th.ones(28)
loss_fn(s_0, s_hat)

tensor(-11.3996, grad_fn=<NegBackward>)

## FGSM

In [10]:
s_hat = th.nn.Parameter(s_0 + delta, requires_grad=True) # adding delta because initial the gradient is 0
loss = loss_fn(s_0, s_hat)
loss.backward()

np.sign(s_hat.grad.detach().cpu().numpy())

array([ 1., -1.,  1.,  1., -1.,  1., -1., -1., -1.,  1., -1., -1.,  1.,
       -1.,  1., -1.,  1., -1., -1.,  1., -1.,  1., -1., -1., -1., -1.,
       -1.,  1.], dtype=float32)

## PGD

In [15]:
s_hat = th.nn.Parameter(s_0+delta, requires_grad=True) # adding delta because initial the gradient is 0
optimizer = th.optim.Adam([s_hat], lr=1e-3)

for _ in range(100):
    optimizer.zero_grad()
    loss = loss_fn(s_0, s_hat)
    loss.backward()
    optimizer.step()
    s_hat.data = th.min(s_0 + l_inf_norm, th.max(s_0 - l_inf_norm, s_hat.data))

s_hat

Parameter containing:
tensor([0.9925, 1.0075, 0.9925, 0.9925, 1.0075, 0.9925, 1.0075, 1.0075, 1.0075,
        0.9925, 1.0075, 1.0075, 0.9925, 1.0075, 0.9925, 1.0075, 0.9925, 1.0075,
        1.0075, 0.9925, 1.0075, 1.0075, 1.0075, 1.0075, 1.0075, 0.9925, 1.0075,
        0.9925], requires_grad=True)