K Lipschitz: 

spectral norm of the matrix $A$:
$$\sigma(A)=\max_{\|h\|_2\leq 1} \|Ah\|_2.$$

For a linear layer:$g(h)=Wh$, $\|g\|_{\text{Lip}}=\sup_h\sigma(\nabla g(h))=\sup_h\sigma(W)=\sigma(W)$.

Normalize $W$ by $\bar{W}_{\text{SN}}(W)=W/\sigma(W)$.

Power iteration to estimate $\sigma(W)$,

$\rho (A)=\max \left\{|\lambda _{1}|,\dotsc ,|\lambda _{n}|\right\}={\frac {b_{k}^{\top }Ab_{k}}{b_{k}^{\top }b_{k}}}={\frac {b_{k+1}^{\top }b_{k}}{b_{k}^{\top }b_{k}}}.$

For each update and each layer l:

Apply power iteration method to a unnormalized weight $W^l$:
$$\tilde{v}_l\leftarrow (W^l)^\top \tilde{u}_l/\|(W^l)^\top \tilde{u}_l\|_2$$
$$\tilde{u}_l\leftarrow (W^l) \tilde{v}_l/\|(W^l) \tilde{v}_l\|_2$$

Calculate $\bar{W}_{SN}$ with the spectral norm:
$$\sigma(W^l)=\tilde{u}_l^\top W^l \tilde{v}_l$$.

Update $W^l$ with SGD on minibatch:
$$W^l\leftarrow W^l-\alpha \nabla_{W^l} loss(\bar{W}^l_{\text{SN}}(W^l))$$

In [1]:
import torch
from torch.optim.optimizer import Optimizer, required

from torch.autograd import Variable
import torch.nn.functional as F
from torch import nn
from torch import Tensor
from torch.nn import Parameter

def l2normalize(v, eps=1e-12):
    return v / (v.norm() + eps)


class SpectralNorm(nn.Module):
    def __init__(self, module, name='weight', power_iterations=1):
        super(SpectralNorm, self).__init__()
        self.module = module
        self.name = name
        self.power_iterations = power_iterations
        if not self._made_params():
            self._make_params()

    def _update_u_v(self):
        u = getattr(self.module, self.name + "_u")
        v = getattr(self.module, self.name + "_v")
        w = getattr(self.module, self.name + "_bar")

        height = w.data.shape[0]
        for _ in range(self.power_iterations):
            v.data = l2normalize(torch.mv(torch.t(w.view(height,-1).data), u.data))
            u.data = l2normalize(torch.mv(w.view(height,-1).data, v.data))

        # sigma = torch.dot(u.data, torch.mv(w.view(height,-1).data, v.data))
        sigma = u.dot(w.view(height, -1).mv(v))
        setattr(self.module, self.name, w / sigma.expand_as(w))

    def _made_params(self):
        try:
            u = getattr(self.module, self.name + "_u")
            v = getattr(self.module, self.name + "_v")
            w = getattr(self.module, self.name + "_bar")
            return True
        except AttributeError:
            return False


    def _make_params(self):
        w = getattr(self.module, self.name)

        height = w.data.shape[0]
        width = w.view(height, -1).data.shape[1]

        u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)
        v = Parameter(w.data.new(width).normal_(0, 1), requires_grad=False)
        u.data = l2normalize(u.data)
        v.data = l2normalize(v.data)
        w_bar = Parameter(w.data)

        del self.module._parameters[self.name]

        self.module.register_parameter(self.name + "_u", u)
        self.module.register_parameter(self.name + "_v", v)
        self.module.register_parameter(self.name + "_bar", w_bar)


    def forward(self, *args):
        self._update_u_v()
        return self.module.forward(*args)