This was an example of trying to use scipy to minimize, before realising it only works for scalar valued functions

In [9]:
# SCIPY SOLVE - doesn't work as it only works for SCALAR VALUED FUNCTIONS

import torch
import torch.nn as nn
import scipy.optimize as opt
import numpy as np
import sys
sys.path.append("../")
from ddn.pytorch.node import *
import warnings
warnings.filterwarnings('ignore')

class NormalizedCuts(EqConstDeclarativeNode):
    """
    A declarative node to embed Normalized Cuts into a Neural Network
    
    Normalized Cuts and Image Segmentation https://people.eecs.berkeley.edu/~malik/papers/SM-ncut.pdf
    Shi, J., & Malik, J. (2000)
    """
    def __init__(self):
        super().__init__()
        
    def objective(self, x, y):
        """
        f(x,y) = y^T(D-W)y / y^TDy
        for W = x
        """
        # Ensure correct size and shape of y... scipy minimise flattens y         
        N = x.size(dim=0)
        y = torch.tensor(y).reshape(N,N)
        
        # x is an NxN symmetrical matrix with W(i,j) = w_ij
        D = x.sum(1).diag() # D is an NxN diagonal matrix with d on diagonal, for d(i) = sum_j(w(i,j))
        ONE = torch.ones(x.size(dim=0),1)   # Nx1 vector of all ones
        L = D - x
        
        top_a = torch.mm(torch.t(y), L)
        top_b = torch.mm(top_a, y)
        
        bot_a = torch.mm(torch.t(y), D)
        bot_b = torch.mm(bot_a, y)
        
        
        f = torch.div(top_b, bot_b)
        print(f)
        return f
    
    def equality_constraints(self, x, y):
        """
        subject to y^TD1=0
        """
        # Ensure correct size and shape of y... scipy minimise flattens y         
        N = x.size(dim=0)
        y = torch.tensor(y).reshape(N,N)
        
        #x is an NxN symmetrical matrix with W(i,j) = w_ij
        D = x.sum(1).diag() # D is an NxN diagonal matrix with d on diagonal, for d(i) = sum_j(w(i,j))
        ONE = torch.ones(N,1)   # Nx1 vector of all ones
        
        a = torch.mm(torch.t(y),D)
        b = torch.mm(a,ONE)
        return b

    def solve(self, x):
        N = x.size(dim=0)
        x0 = torch.ones(N,N)
        # requires scipy 1.4.1, otherwise you recieve strange errors and minimisation doesn't work
        result = opt.minimize(lambda y: self.objective(x, y), # objective
                              x0, # initial guess
                              constraints={'type': 'eq', 'fun': lambda y: self.equality_constraints(x,y)}, # constraint
                              method='BFGS',
                              options={'disp': True}) #print output
        
        y = torch.tensor(result.x).reshape(N,N)
        print(y)
        return torch.tensor(result.x), None
    
class Net(nn.Module):
    def __init__(self):
        """
        instatiate parameter
        """
        super().__init__()
    
# GPU ID to use
gpu = 1

# Create model, set to use GPU
model = Net()
torch.set_default_tensor_type(torch.DoubleTensor)

node = NormalizedCuts()
x = torch.tensor([[0,1,0], [2,0,3], [0,4,0]]).double()
y,_ = node.solve(x)
print(y)
print("done")
# torch.cuda.set_device(gpu)
# model = model.cuda(gpu)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[-1.4901e-09,  0.0000e+00,  0.0000e+00],
        [-1.4901e-09,  0.0000e+00,  0.0000e+00],
        [-1.4901e-09,  0.0000e+00,  0.0000e+00]])


ValueError: The user-provided objective function must return a scalar value.

In [72]:
import torch

W = torch.tensor([[0.,1.,0.], [2.,0.,3.], [0.,4.,0.]])

print(A.size(dim=0))
print(A[1,0])

print("~~")
W = A   # W is an NxN symmetrical matrix with W(i,j) = w_ij
D = W.sum(1).diag() # D is an NxN diagonal matrix with d on diagonal, for d(i) = sum_j(w(i,j))
ONE = torch.ones(x.size(dim=0),1)   # Nx1 vector of all ones

print(type(torch.t(W)))
print(type(torch.mm(torch.t(W), D)))
print(type(ONE))

print(ONE)
print(torch.t(W) * D * ONE)

print(torch.mm(torch.mm(torch.t(W), D), ONE))

3
tensor(2.)
~~
tensor([[1., 0., 0.],
        [0., 5., 0.],
        [0., 0., 4.]])
tensor([[0., 1., 0.],
        [2., 0., 3.],
        [0., 4., 0.]])
tensor([[ 3.,  0.,  0.],
        [ 0., 10.,  0.],
        [ 0.,  0.,  7.]])
~~
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
tensor([[1.],
        [1.],
        [1.]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[10.],
        [17.],
        [15.]])


This was an attempt to move to pytorch.optim to solve the solution, however this
- produced the incorrect solution (as you cannot supply constraints)

In [10]:
# Pytorch optim solving

import torch
import torch.nn as nn
import scipy.optimize as opt
import numpy as np
import sys
sys.path.append("../")
from ddn.pytorch.node import *
import warnings
warnings.filterwarnings('ignore')

class NormalizedCuts(AbstractDeclarativeNode):
    """
    A declarative node to embed Normalized Cuts into a Neural Network
    
    Normalized Cuts and Image Segmentation https://people.eecs.berkeley.edu/~malik/papers/SM-ncut.pdf
    Shi, J., & Malik, J. (2000)
    """
    def __init__(self):
        super().__init__()
        
    def objective(self, x, y):
        """
        f(x,y) = y^T(D-W)y / y^TDy
        for W = x
        """
        W = x   # W is an NxN symmetrical matrix with W(i,j) = w_ij
                
        D = W.sum(1).diag() # D is an NxN diagonal matrix with d on diagonal, for d(i) = sum_j(w(i,j))
        ONE = torch.ones(x.size(dim=0))   # Nx1 vector of all ones
        L = D - W
        
        return (torch.mm(torch.mm((torch.t(y),L),y))/ (torch.mm(torch.transpose(y),D),y))
    
    def equality_constraints(self, x, y):
        """
        subject to y^TD1=0
        """
        W = x   # W is an NxN symmetrical matrix with W(i,j) = w_ij
        D = W.sum(1).diag() # D is an NxN diagonal matrix with d on diagonal, for d(i) = sum_j(w(i,j))
        ONE = torch.ones(x.size(dim=0))   # Nx1 vector of all ones
        return torch.mm(torch.mm((torch.t(y),D),ONE))

    def solve(self, x):
        
        
        return y, None
    
    def _run_optimisation(self, *xs, y):
        with torch.enable_grad():
            opt = torch.optim.LBFGS([y],
                                    lr=1.0,
                                    max_iter=1000,
                                    max_eval=None,
                                    tolerance_grad=1e-40,
                                    tolerance_change=1e-40,
                                    history_size=100,
                                    line_search_fn="strong_wolfe"
                                    )
            def reevaluate():
                opt.zero_grad()
                f = self.objective(*xs, y=y).sum() # sum over batch elements
                f.backward()
                return f
            opt.step(reevaluate)
        return y

node = NormalizedCuts()
x = torch.tensor([[0,1,0], [2,0,3], [0,4,0]])
y,_ = node.solve(x)
print(y)

tensor([ 0.0000, -0.5774, -0.2852, -0.7651])


This was an attempt to solve iteratively, using all tracked functions from autograd.numpy or pytorch

The idea being it could track the iterative steps and still calculate the gradient (in hindsight not a great idea)

In [11]:
import autograd.numpy as np
import torch
from autograd import grad, jacobian

def gradient(f, x, y):
    fY = grad(f,1)
    fYY = jacobian(fY, 1)
    fXY = jacobian(fY, 0)
    
    return -1.0 * np.linalg.solve(fYY(x,y), fXY(x,y))


x = torch.tensor([[0,1,0,0], [1,0,0,3], [0,0,0,0], [0,3,0,0]]).double()

def objective(x, y):
        # x is an NxN symmetrical matrix with W(i,j) = w_ij
        D = x.sum(0).diag() # D is an NxN diagonal matrix with d on diagonal, for d(i) = sum_j(w(i,j))
        ONE = torch.ones(x.size(dim=0),1)   # Nx1 vector of all ones
        L = D - x
        
        # L_sym = torch.mm(torch.mm(torch.diag(torch.pow(torch.diag(D),-0.5)),L),torch.diag(torch.pow(torch.diag(D),-0.5)))
        return torch.mm(torch.mm(torch.t(y),L), y)
    
def solve(x):
        D = x.sum(0).diag() # D is an NxN diagonal matrix with d on diagonal, for d(i) = sum_j(w(i,j))
        ONE = torch.ones(x.size(dim=0),1)   # Nx1 vector of all ones
        L = D - x
        
        val, vec = torch.linalg.eigh(L)
        seen = {}
        uniques = []
        for (x,y) in zip(val, vec):
            if x in seen:
                continue
            seen[x] = 1
            uniques.append((x,y))
        fiedler = sorted(uniques)[1][1]
        return fiedler, _
    
x = torch.tensor([[0,1,0,0], [1,0,0,3], [0,0,0,0], [0,3,0,0]]).double()
y,_ = solve(x)
type(y)
print(y)
gradient(objective(x,y),x ,y)

tensor([ 0.0000, -0.5774, -0.2852, -0.7651])


RuntimeError: self must be a matrix

In [8]:
# This semi works, but the solution is wrong and the gradient crashes half way

import autograd.numpy as np
import torch
from autograd import grad, jacobian

def gradient(f, x, y):
    fY = grad(f,1)
    fYY = jacobian(fY, 1)
    fXY = jacobian(fY, 0)
    
    return -1.0 * np.linalg.solve(fYY(x,y), fXY(x,y))

n = 5
M = np.random.uniform(0,255,(n,n))
symm = M@M.T
# test for symmetry
print(symm)

torch.set_default_tensor_type(torch.FloatTensor)
x = torch.tensor(symm, requires_grad=True)

D = x.sum(0).diag() # D is an NxN diagonal matrix with d on diagonal, for d(i) = sum_j(w(i,j))
ONE = torch.ones(x.size(dim=0),1)   # Nx1 vector of all ones
L = D - x

# L.backward(x)

L.t()


x = torch.tensor([[0,1,0,0], [1,0,0,3], [0,0,0,0], [0,3,0,0]]).double()
D = x.sum(0).diag()
print(D)

x1 = torch.tensor([[[0,1,0,0], [1,0,0,3], [0,0,0,0], [0,3,0,0]]]).double()
D = torch.einsum('bij->bj', x1)
d1, d2 = D.size()
D = torch.diag_embed(D)
print(D)

[[ 98854.2753754  104142.66221626  92460.67919163 109064.65407346
  107222.1442791 ]
 [104142.66221626 141858.63181405 103985.84690384 157299.66602452
  131495.35491065]
 [ 92460.67919163 103985.84690384 116752.6405859  139256.92311514
   95619.87134508]
 [109064.65407346 157299.66602452 139256.92311514 206826.68273427
  131264.5428598 ]
 [107222.1442791  131495.35491065  95619.87134508 131264.5428598
  135106.71342282]]
tensor([[1., 0., 0., 0.],
        [0., 4., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 3.]], dtype=torch.float64)
tensor([[[1., 0., 0., 0.],
         [0., 4., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 3.]]], dtype=torch.float64)
