Goal
- Find the smallest eigenvector $\lambda$ for the NC node

Problem
- Some methods too slow and/or crash on certain cases
---
Input
 - Positive definite matrix (symmetric)

Output
 - Graph cut of the matrix (corresponds to the smallest eigenvector)
---
Must
- Handle image sized inputs
- Have reproducible results (fixed seed)

In [1]:
import numpy as np
from scipy import linalg
import torch
## TODO: Begin with a simple 'image' that has a known cut, this way it can be tested if correct
## May be able to compare to sklearn.cluster.SpectralClustering (https://scikit-learn.org/stable/modules/generated/sklearn.cluster.SpectralClustering.html?highlight=lobpcg#r5f6cbeb1558e-4)

seed = 123
dtype = np.float64 # Currently doesn't do anything as random.rand doesn't accept
n = 10
# and possibly generate different sparisities?? As realistically will be somewhat sparse
np.random.seed(seed)



# Random image of size
# input = np.random.randint(0,255, n)

# TODO: Test for larger sized (full correctly formed, symmetric pos def - but random, and fully random matricies)
# pretend simple image
input = np.array([1,1,1,1,255,255,255,255,255,255,
                  1,1,1,1,255,255,255,255,255,255,
                  1,1,1,1,255,255,255,255,255,255,
                  1,1,1,1,255,255,255,255,255,255,
                  1,1,1,1,255,255,255,255,255,255,
                  1,1,1,1,255,255,255,255,255,255,
                  1,1,1,1,1,255,255,255,255,255,
                  1,1,1,1,1,255,255,255,255,255,
                  1,1,1,1,1,1,255,255,255,255,
                  1,1,1,1,1,1,1,1,255,255,])

# in = n random numbers between 0 and 255 # probably better if its slightly realistic?
A = linalg.fiedler(input)

# TODO: Change this with a real image, and real values from it... 
#       but for now should be fine (same properties being symmetric positive semi-definite)

A = A.reshape(1,n*n,n*n)
A = torch.from_numpy(A)

print(A)
print(input.shape)
print(A.shape)



tensor([[[  0,   0,   0,  ...,   0, 254, 254],
         [  0,   0,   0,  ...,   0, 254, 254],
         [  0,   0,   0,  ...,   0, 254, 254],
         ...,
         [  0,   0,   0,  ...,   0, 254, 254],
         [254, 254, 254,  ..., 254,   0,   0],
         [254, 254, 254,  ..., 254,   0,   0]]])
(100,)
torch.Size([1, 100, 100])


In [2]:
# Similar to https://gist.github.com/denis-bz/6a9d7379c8edf965b0a997c2ec2471e1

# used to store each of the functions, and allow them to all take the single arg input
from collections import OrderedDict
from functools import partial

# scipy and numpy are used for the eigensolvers
import scipy

# TODO: reinstall scikit-sprase on mac to get it to install properly (wrong depecencies and unknown fix for mac)
# until then just dont test unless on debian system
# import sksparse 


# TODO: use the initial vector :)
# TODO: and use it such that you test both v0 set to 0 and v0 set to random :) to see if any differences
v0 = np.zeros_like(A) # initialise the initial vector to all zeros :)

eigs_options = OrderedDict(
    ## NOTE: Will need to set the v0 to something consistent
    ## NOTE: and if neccessary any seeds used by them....
    ## NOTE: All inputs will be positive definite so should be easy :)
    
    
    # Types to try:
    # - shift invert (as we are looking for smallest) (https://gist.github.com/denis-bz/2658f671cee9396ac15cfe07dcc6657d)
    # - Power iteration, QR, LOBPCG
    # - Lanzcos, Arnoldi
    # - cholmod (https://scikit-sparse.readthedocs.io/en/latest/cholmod.html, https://stackoverflow.com/questions/59416098/finding-smallest-eigenvectors-of-large-sparse-matrix-over-100x-slower-in-scipy)
    # - any gpu based ones? (pytorch perhaps?)
    
    # Options will include the driver for each as well as the unique methods
    
    
    # The actual methods available
    # scipy.linalg.eig 
    # scipy.linalg.eigh  # Should be good
    # scipy.sparse.linalg.lobpcg
    # scipy.sparse.linalg.eigs
    # scipy.sparse.linalg.eigsh
    # sksparse.cholmod.cholesky # Should be good
    # scipy.sparse.linalg.bicg
    # scipy.sparse.linalg.gmres
    
    # scipy.sparse.linalg.splu ??
    # scipy.linalg.cholesky ??
    # scipy.linalg.qr ??
    
    # numpy.linalg.cholesky
    # numpy.linalg.qr
    # numpy.linalg.eig
    # numpy.linalg.eigh    # Should be good
    
    
    
    # Each should just take one argument (A) the input matrix
    
    # Numpy (no params only inputs)
    np_eig = np.linalg.eig,
    np_eigh = np.linalg.eigh,
    np_eigvals = np.linalg.eigvals,
    
    # Some parameters for scipy variants
    sp_eig = partial(scipy.linalg.eig, check_finite=False), # No extra params
    
    
    # TODO: think about the problem I am solving and figure which forms I should give....
    # g is the generaized problem (where b is not None)
    
    # Subset by index only for evr, evx, and gvx
    # driver sy for real
    # syev is symmetric QR (slow but robust)
    # syevr seen as optimal for most cases
    # syevd is faster for more memeroy
    # syevx could be useful for a single eigenvalue on large matricies...
    sp_eigh = partial(scipy.linalg.eigh, check_finite=False, subset_by_index=[0,1]), # driver=, type=(generalized or not), 
    # defaults to driver=syevr...
    
    
    
    # NOTE: eigvalsh is a one-liner shorthand for scipy.linalg.eigh with the option eigvals_only=True 
    # so not useful for me as I only want eigenvectors :) 
    # np_eigvalsh = np.linalg.eigvalsh,  # _syevd
    # sp_eigvalsh_ev = partial( scipy.linalg.eigvalsh, driver="ev" ),  # ev evd evr evx
    # sp_eigvalsh_evd = partial( scipy.linalg.eigvalsh, driver="evd" ),
    # sp_eigvalsh_evr = partial( scipy.linalg.eigvalsh, driver="evr" ),

    #    # evecs too --
    # np_eigh = np.linalg.eigh,
    # sp_eigh_evd = partial( scipy.linalg.eigh, driver="evd" ),
    # sp_eigh_evr = partial( scipy.linalg.eigh, driver="evr" ),
    #     # ev evd evr evx / gv gvd gvx generalized

    #     # complex evals --
    # np_eigvals = np.linalg.eigvals,  # _geev
    # sp_eigvals = scipy.linalg.eigvals,

    # np_lstsq = partial( np.linalg.lstsq, b=b, rcond=rcond ),
    # sp_lstsq = partial( scipy.linalg.lstsq, b=b, cond=rcond ),

    # np_solve = partial( np.linalg.solve, b=b ),
    # sp_solve = partial( scipy.linalg.solve, b=b ),
    # np_svd = partial( np.linalg.svd, compute_uv=False ),  # gesdd
    # sp_svd = partial( scipy.linalg.svd, compute_uv=False ),  # lapack_driver : {'gesdd', 'gesvd'}
)

In [3]:
from time import time
import sys
sys.path.append("../")
from nc import NormalizedCuts

node = NormalizedCuts(eps=1e-8)#, bipart=args.bipart, symm_norm_L=args.symm_norm_L)

for name, func in eigs_options.items():
    t0 = time()
    y,_ = node.solve(A,func=func) # The output also includes context (not needed herex)
    t = time() - t0
    
    y = torch.real(y)
    solution = node.objective(A.reshape(1,n*n,n*n),y.reshape(1,n,n))
    # Check against objetive function, should solve as close to machine precision as possible    
    print(f"{name:15}: {t:5.0f} sec solution = {solution.item()}")

np_eig         :     0 sec  shape=torch.Size([1, 10, 10]) solution = 1.9968051118210863
np_eigh        :     0 sec  shape=torch.Size([1, 10, 10]) solution = 1.0
np_eigvals     :     0 sec  shape=torch.Size([1, 10, 10]) solution = 0.02197561675128176
sp_eig         :     0 sec  shape=torch.Size([1, 10, 10]) solution = 1.9968051118210863
sp_eigh        :     0 sec  shape=torch.Size([1, 10, 10]) solution = 1.0
