In [69]:
import torch
import torch.optim as optim
import numpy as np
from ddn.pytorch.node import *

def f(x):
    return (1 - x[0])**2 + 100 * (x[1] - x[0]**2)**2

# L-BFGS
x_lbfgs = 10*torch.ones(2, 2)
x_lbfgs.requires_grad = True

optimizer = optim.LBFGS([x_lbfgs],
                        history_size=10,
                        max_iter=4,
                        line_search_fn="strong_wolfe")
print(x_lbfgs)

h_lbfgs = []
for i in range(100):
    optimizer.zero_grad()
    objective = f(x_lbfgs)
    objective.backward()
    optimizer.step(lambda: f(x_lbfgs))
    h_lbfgs.append(objective.item())
    
print(x_lbfgs)

tensor([[10., 10.],
        [10., 10.]], requires_grad=True)


RuntimeError: grad can be implicitly created only for scalar outputs

In [None]:
import torch
import torch.optim as optim
import numpy as np
from ddn.pytorch.node import *

# class NormalizedCuts(EqConstDeclarativeNode):
class NormalizedCuts(AbstractDeclarativeNode):
    def __init__(self):
        super().__init__()
        
    def general_eigen(self, A, y):
        """ f = y^T A y """
        
        # Batch         
        yT = torch.einsum('bij->bji', y)
        # Batch matrix multiplication
        return torch.einsum('bij,bjk->bik', torch.einsum('bij,bjk->bik', yT, A), y)
        
        # For single problem...        
        # return torch.matmul(torch.matmul(y.t()), A), y)
    
    def objective(self, x, y):
        """ f(x,y) = y^T (D-W) y """
        D = torch.einsum('bij->bj', x)
        D = torch.diag_embed(D)
        L = D - x # Laplacian matrix
        return self.general_eigen(L, y)
        
#     def equality_constraints(self, x, y):
#         """ h(x,y) = y^T y = 1 """
#         return torch.matmul(y.t(), y) - 1
        
    def solve(self, x):
        x.detach()
        y = torch.rand_like(x, requires_grad=True)
        y = self._run_optimisation( x, y=y)
        return y.detach(), None
    
    def _run_optimisation(self, *xs, y):
            with torch.enable_grad():
                opt = torch.optim.LBFGS([y],
                                        lr=1.0,
                                        max_iter=1000,
                                        max_eval=None,
                                        tolerance_grad=1e-40,
                                        tolerance_change=1e-40,
                                        history_size=100,
                                        line_search_fn="strong_wolfe"
                                        )
                def reevaluate():
                    opt.zero_grad()
                    f = self.objective(*xs, y=y).sum() # sum over batch elements
                    f.backward()
                    return f
                opt.step(reevaluate)
            return y
        
# torch.set_default_tensor_type(torch.DoubleTensor)

node = NormalizedCuts()
x = torch.tensor([[[0,1,0,0], [1,0,0,3], [0,0,0,0], [0,3,0,0]]], dtype=torch.float, requires_grad=True)
print(x)
print(x.size())
y,_ = node.solve(x)
print(y)
print(node.gradient(x, y=y))

In [None]:
%debug

> [0;32m/data/gwales/anaconda3/envs/ddn/lib/python3.7/site-packages/torch/autograd/__init__.py[0m(236)[0;36mgrad[0;34m()[0m
[0;32m    234 [0;31m    return Variable._execution_engine.run_backward(
[0m[0;32m    235 [0;31m        [0moutputs[0m[0;34m,[0m [0mgrad_outputs_[0m[0;34m,[0m [0mretain_graph[0m[0;34m,[0m [0mcreate_graph[0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 236 [0;31m        inputs, allow_unused, accumulate_grad=False)
[0m[0;32m    237 [0;31m[0;34m[0m[0m
[0m[0;32m    238 [0;31m[0;34m[0m[0m
[0m
ipdb> inputs
(tensor([[[0.6699, 0.0673, 0.2774, 0.5464],
         [0.7027, 0.2216, 0.2703, 0.3664],
         [0.9152, 0.8076, 0.9612, 0.8428],
         [0.1755, 0.4587, 0.2090, 0.7177]]]),)
