In [69]:
import torch
import torch.optim as optim
import numpy as np
from ddn.pytorch.node import *

def f(x):
    return (1 - x[0])**2 + 100 * (x[1] - x[0]**2)**2

# L-BFGS
x_lbfgs = 10*torch.ones(2, 2)
x_lbfgs.requires_grad = True

optimizer = optim.LBFGS([x_lbfgs],
                        history_size=10,
                        max_iter=4,
                        line_search_fn="strong_wolfe")
print(x_lbfgs)

h_lbfgs = []
for i in range(100):
    optimizer.zero_grad()
    objective = f(x_lbfgs)
    objective.backward()
    optimizer.step(lambda: f(x_lbfgs))
    h_lbfgs.append(objective.item())
    
print(x_lbfgs)

tensor([[10., 10.],
        [10., 10.]], requires_grad=True)


RuntimeError: grad can be implicitly created only for scalar outputs

In [70]:
import torch
import torch.optim as optim
import numpy as np
from ddn.pytorch.node import *

# class NormalizedCuts(EqConstDeclarativeNode):
class NormalizedCuts(AbstractDeclarativeNode):
    def __init__(self):
        super().__init__()
        
    def general_eigen(self, A, y):
        """ f = y^T A y """
        
        # Batch         
        yT = torch.einsum('bij->bji', y)
        # Batch matrix multiplication
        return torch.einsum('bij,bjk->bik', torch.einsum('bij,bjk->bik', yT, A), y)
        
        # For single problem...        
        # return torch.matmul(torch.matmul(y.t()), A), y)
    
    def objective(self, x, y):
        """ f(x,y) = y^T (D-W) y """
        D = torch.einsum('bij->bj', x1)
        D = torch.diag_embed(D)
        L = D - x # Laplacian matrix
        return self.general_eigen(L, y)
        
#     def equality_constraints(self, x, y):
#         """ h(x,y) = y^T y = 1 """
#         return torch.matmul(y.t(), y) - 1
        
    def solve(self, x):
        x.detach()
        y = torch.rand_like(x, requires_grad=True)
        y = self._run_optimisation( x, y=y)
        return y.detach(), None
    
    def _run_optimisation(self, *xs, y):
            with torch.enable_grad():
                opt = torch.optim.LBFGS([y],
                                        lr=1.0,
                                        max_iter=1000,
                                        max_eval=None,
                                        tolerance_grad=1e-40,
                                        tolerance_change=1e-40,
                                        history_size=100,
                                        line_search_fn="strong_wolfe"
                                        )
                def reevaluate():
                    opt.zero_grad()
                    f = self.objective(*xs, y=y).sum() # sum over batch elements
                    f.backward()
                    return f
                opt.step(reevaluate)
            return y
        
torch.set_default_tensor_type(torch.DoubleTensor)

node = NormalizedCuts()
x = torch.tensor([[[0,1,0,0], [1,0,0,3], [0,0,0,0], [0,3,0,0]]], dtype=torch.double, requires_grad=False)
print(x)
print(x.size())
y,_ = node.solve(x)
print(y)
print(node.gradient(x, y=y))

tensor([[[0., 1., 0., 0.],
         [1., 0., 0., 3.],
         [0., 0., 0., 0.],
         [0., 3., 0., 0.]]])
torch.Size([1, 4, 4])
tensor([[[0.3833, 0.6843, 0.4916, 0.8420],
         [0.8237, 0.8359, 0.0894, 0.6521],
         [0.2523, 0.0375, 0.3494, 0.7093],
         [0.6886, 1.0280, 0.1549, 0.5297]]])


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn