In [1]:
import numpy as np
import scipy.sparse
from scipy.sparse.linalg import lobpcg, eigsh, minres, LinearOperator
from scipy.sparse import csr_matrix
import time
from utils.tools import build_weighted_bethe_hessian
import networkx as nx

In [2]:
adj_path = "/Users/i.lobov/hyperwords/data/wiki/wikipedia.corpus.nodups_counts_win=1.adj"
adjacency_matrix = scipy.sparse.load_npz(adj_path + ".npz")
#adjacency_matrix.data = 1 + np.log(adjacency_matrix.data)
adjacency_matrix.data = adjacency_matrix.data.astype(np.float32) ** 0.0
#adjacency_matrix.data /= np.max(adjacency_matrix.data)
degrees = np.asarray(adjacency_matrix.sum(axis=1)).flatten()

In [4]:
n = adjacency_matrix.shape[0]
D = scipy.sparse.spdiags(degrees, [0], n, n, format='csr')
L = D - adjacency_matrix
diags_sqrt = 1.0/np.sqrt(degrees)
DH = scipy.sparse.spdiags(diags_sqrt, [0], n, n, format='csr')
Hr = DH.dot(L.dot(DH))

In [8]:
#r = np.sqrt(5176.292450)
r = np.sqrt(np.mean(degrees**2) / np.mean(degrees) - 1)
#r = np.sqrt(np.mean(degrees))
Hr = build_weighted_bethe_hessian(adjacency_matrix, r)

#I = scipy.sparse.eye(n, format='csr')
#Hr = D - adjacency_matrix #+ I*np.mean(degrees)

In [17]:
# n = adjacency_matrix.shape[0]
# I = scipy.sparse.eye(n, n, dtype=np.float32, format='csr')
# Hr = Hr + I * 55

In [5]:
# preconditioner = scipy.sparse.spdiags(1.0 / bethe_diagonal, [0], n, n, format='csr')

In [6]:
# n = adjacency_matrix.shape[0]
# dim = 100
# tol = np.sqrt(1e-15)*n

# start = time.time()
# vals, vecs = eigsh(Hr, dim, which='SA', tol=tol)
# print("time elapsed: %d" % (time.time() - start))

In [5]:
class Operator(object):

    def __init__(self, A):
        self.A = A.astype(PETSc.ScalarType)
        self.n_calls = 0

    def mult(self, A, x, y):
        xx = x.getArray(readonly=1)
        yy = y.getArray(readonly=0)
        yy[:] = self.A.dot(xx)
        self.n_calls += 1
        
    def getDiagonal(self, A, y):
        yy = y.getArray(readonly=0)
        yy[:] = self.A.diagonal()

In [6]:
from petsc4py import PETSc
from slepc4py import SLEPc

n = adjacency_matrix.shape[0]
mat = Operator(Hr)
A = PETSc.Mat().createPython([n, n], mat)
A.setUp()
# A = PETSc.Mat().createAIJ(
#     size=(n,n), 
#     csr=(Hr.indptr, Hr.indices, Hr.data.astype(np.float32)))

k = 100
#tol = np.sqrt(1e-15)*n
tol = 1e-4
max_iter = 100

E = SLEPc.EPS()
E.create()
E.setOperators(A)
E.setProblemType(SLEPc.EPS.ProblemType.HEP)
E.setDimensions(nev=k)
E.setTolerances(tol, max_iter)
E.setWhichEigenpairs(SLEPc.EPS.Which.SMALLEST_REAL)
#E.setConvergenceTest(SLEPc.EPS.Conv.ABS)

In [7]:
import time

mat.n_calls = 0
start = time.time()
E.solve()
print("Time elapsed: %f" % (time.time() - start))
print("Number of calls to Ax: %d" % mat.n_calls)

Time elapsed: 172.629435
Number of calls to Ax: 700


In [None]:
# Default dimensions and regular scaling
# Time elapsed: 285.804691
# Number of calls to Ax: 1193
# Number of iterations of the method: 16

# ncv=110
# Time elapsed: 467.946925
# Number of calls to Ax: 1989
# Number of iterations of the method: 100
# Number of converged eigenpairs: 90

# ncv=150
# Time elapsed: 336.665154
# Number of calls to Ax: 1481
# Number of iterations of the method: 34

# Default dimensions and normalized adjacency, tol=1e-4
# Time elapsed: 138.258507
# Number of calls to Ax: 600
# Number of iterations of the method: 7

In [8]:
print("")
its = E.getIterationNumber()
print("Number of iterations of the method: %i" % its)
sol_type = E.getType()
print("Solution method: %s" % sol_type)
nev, ncv, mpd = E.getDimensions()
print("Number of requested eigenvalues: %i" % nev)
tol, maxit = E.getTolerances()
print("Stopping condition: tol=%.4g, maxit=%d" % (tol, maxit))
nconv = E.getConverged()
#nconv = 500
print("Number of converged eigenpairs: %d" % nconv)

vecs = np.zeros([n, nconv])
vals = np.zeros(nconv)

xr, tmp = A.getVecs()
xi, tmp = A.getVecs()

if nconv > 0:
    print("")
    print("        k          ||Ax-kx||/||kx|| ")
    print("----------------- ------------------")
    for i in range(nconv):
        k = E.getEigenpair(i, xr, xi)
        vals[i] = k.real
        vecs[:,i] = xr
        if i < 10:
            error = E.computeError(i)
            if k.imag != 0.0:
                print(" %9f%+9f j  %12g" % (k.real, k.imag, error))
            else:
                print(" %12f       %12g" % (k.real, error))
    print("")


Number of iterations of the method: 6
Solution method: krylovschur
Number of requested eigenvalues: 100
Stopping condition: tol=0.0001, maxit=100
Number of converged eigenpairs: 100

        k          ||Ax-kx||/||kx|| 
----------------- ------------------
     0.000003             473279
     0.543911        6.05379e-10
     0.605690        1.00282e+10
     0.648847                 -0
     0.668177        9.09041e+09
     0.684353             2.0665
     0.694946                nan
     0.711350        4.62884e-10
     0.720759        4.56841e-10
     0.728543            1.94115



In [9]:
output_path = "../data/wiki/win=1_normalized_laplacian_slepc_pow=0.0_dim=100"
np.save(output_path + ".vecs", vecs[:,:100])
np.save(output_path + ".vals", vals[:100])
np.save(output_path + ".degrees", degrees)

In [10]:
import shutil
base_path = '../data/wiki/'
shutil.copyfile(base_path + 'win=1_weighted_bethe_hessian_slepc_scaled_abs_tol=1e-3_pow=0.3_dim=500.words.vocab', 
                base_path + 'win=1_normalized_laplacian_slepc_pow=0.0_dim=100' + ".words.vocab")

'../data/wiki/win=1_normalized_laplacian_slepc_pow=0.0_dim=100.words.vocab'

In [14]:
output_path = "../data/wiki/win=1_weighted_bethe_hessian_scaled3_pow=0.30_dim=100"
np.save(output_path + ".vecs", vecs2)
np.save(output_path + ".vals", vals2 - np.mean(degrees))
np.save(output_path + ".degrees", degrees)

In [11]:
output_path = "../data/wiki/win=1_bethe_hessian_small_rhoB_est_pow=0.00_dim=100"
vecs2 = np.load(output_path + ".vecs.npy")
vals2 = np.load(output_path + ".vals.npy")

In [14]:
all_vals = np.concatenate([vals, vals2], axis=0)
all_vecs = np.concatenate([vecs, vecs2], axis=1)

In [19]:
top_vals_inds = np.argsort(all_vals)[:100]
output_path = "../data/wiki/win=1_bethe_hessian_combo_rhoB_est_pow=0.00_dim=100"
np.save(output_path + ".vecs", all_vecs[:, top_vals_inds])
np.save(output_path + ".vals", all_vals[top_vals_inds])
np.save(output_path + ".degrees", degrees)