In [2]:
import dgl
import numpy as np
import torch
import torch.nn as nn
from scipy import sparse
import networkx as nx
from scipy.sparse import diags
import torch.functional as F
from pathlib import Path

import json
import time
from pathlib import Path
from networkx.readwrite import json_graph, read_gpickle
from networkx.linalg.laplacianmatrix import laplacian_matrix
from scipy.io import mmwrite, mmread
from scipy.sparse import csr_matrix
import sklearn
from scipy.io import mmread, mmwrite # spare save/load

# Utils Functions

In [3]:
# a = np.array([1, 0, 3])
def onehot(a): 
    b = np.zeros((a.size, a.max()+1))
    b[np.arange(a.size),a] = 1
    return b
# onehot(a)

def load_matrix(graph):
    adj_matrix = nx.adj_matrix(graph)
    degree_vec = adj_matrix.sum(axis=1).astype(np.float)
    with np.errstate(divide='ignore'):
        d_inv_sqrt = np.squeeze(np.asarray(np.power(degree_vec, -1)))
    d_inv_sqrt[np.isinf(d_inv_sqrt) | np.isnan(d_inv_sqrt)] = 0
    degree_matrix = diags(d_inv_sqrt, 0)
    return adj_matrix, degree_matrix

def lpa(adj_matrix,degree_matrix, labels,train_mask,iteration=10):
    influence=labels.copy()
    influence[np.arange(train_mask.size,labels.shape[0])]=0  # remove invisible_nodes
    for _ in range(iteration):
        influence = degree_matrix@adj_matrix@influence
        influence[train_mask]=labels[train_mask]
    pred=influence.argmax(1)
    labels=labels.argmax(1)
    border_nodes = (pred!=labels).nonzero()[0]
    acc = (pred==labels).sum()/labels.size
    return influence, acc, border_nodes

# influence, acc =lpa(adj_matrix,degree_matrix,onehot_labels,train_mask,iteration=20)

## Load Data

In [4]:
from dgl.data import citation_graph as citegrh
cora = citegrh.load_cora()
citeseer = citegrh.load_citeseer()
pubmed = citegrh.load_pubmed()

  r_inv = np.power(rowsum, -1).flatten()


Finished data loading and preprocessing.
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Finished data loading and preprocessing.
  NumNodes: 19717
  NumEdges: 88651
  NumFeats: 500
  NumClasses: 3
  NumTrainingSamples: 60
  NumValidationSamples: 500
  NumTestSamples: 1000


In [511]:
data=pubmed
graph=data.graph
labels=data.labels
onehot_labels=onehot(data.labels)
train_mask=data.train_mask.astype(np.int).nonzero()[0]
train_labels = labels[train_mask]

## Run LPA

In [512]:
adj_matrix, degree_matrix=load_matrix(graph)
influence, acc, border_nodes =lpa(adj_matrix,degree_matrix,onehot_labels,train_mask,iteration=20)

## Coarsen Analysis

In [523]:
dataset='pubmed'
levels = 6
reduce_results = f"graphzoom/reduction_results/{dataset}/no_fusion/"
from graphzoom.utils import construct_proj_laplacian
def check_agg(nodes, projection):
    coarsen_nodes=[]
    for node in nodes:
        seed=projection[:,node].nonzero()[0][0]
        agg_nodes = projection[seed].nonzero()[1]
        if agg_nodes.size>1:
            coarsen_nodes.append(node)
    return coarsen_nodes

In [524]:
input_path = "graphzoom/dataset/{}/{}.mtx".format(dataset, dataset)
laplacian = mmread(input_path)
projections, coarse_adj = construct_proj_laplacian(laplacian, levels, reduce_results)

In [525]:
projections

[<7523x19717 sparse matrix of type '<class 'numpy.longlong'>'
 	with 19717 stored elements in Compressed Sparse Row format>,
 <3329x7523 sparse matrix of type '<class 'numpy.longlong'>'
 	with 7523 stored elements in Compressed Sparse Row format>,
 <1507x3329 sparse matrix of type '<class 'numpy.longlong'>'
 	with 3329 stored elements in Compressed Sparse Row format>,
 <695x1507 sparse matrix of type '<class 'numpy.longlong'>'
 	with 1507 stored elements in Compressed Sparse Row format>,
 <321x695 sparse matrix of type '<class 'numpy.longlong'>'
 	with 695 stored elements in Compressed Sparse Row format>,
 <161x321 sparse matrix of type '<class 'numpy.longlong'>'
 	with 321 stored elements in Compressed Sparse Row format>]

In [517]:
coarse_nodes = check_agg(border_nodes,projections[0])
print(len(coarse_nodes), len(border_nodes))
from random import shuffle
shuffle(coarse_nodes)

4710 5853


In [496]:
def add_border_nodes_to_proj_matrix(projection, nodes):
    # convert to csc
    projs=[]
    if type(projection) is not list:
        projection=[projection]
    # change first projection
    proj=projection[0].tocsc()
    print(proj.shape)
    next_level_size=proj.shape[0]
    for node in nodes:
        proj.indices[node]=next_level_size
        next_level_size+=1
    projs.append(sparse.csc_matrix((proj.data, proj.indices,proj.indptr), dtype=np.longlong))
    # change via coo_matrix
    for i in range(1, len(projection)):
        proj = projection[i].tocoo()
        data = np.ones(proj.shape[1]+len(nodes)).astype(np.longlong)
        col = np.hstack((proj.col, np.arange(proj.shape[1],proj.shape[1]+len(nodes))))
        row = np.hstack((proj.row, np.arange(proj.shape[0],proj.shape[0]+len(nodes))))
        projs.append(sparse.coo_matrix((data, (row, col))))
    
    return projs

In [527]:
coarse_nodes_left=1000
border_proj=add_border_nodes_to_proj_matrix(projections[:], coarse_nodes[:coarse_nodes_left])
border_proj

(7523, 19717)


[<8523x19717 sparse matrix of type '<class 'numpy.longlong'>'
 	with 19717 stored elements in Compressed Sparse Column format>,
 <4329x8523 sparse matrix of type '<class 'numpy.longlong'>'
 	with 8523 stored elements in COOrdinate format>,
 <2507x4329 sparse matrix of type '<class 'numpy.longlong'>'
 	with 4329 stored elements in COOrdinate format>,
 <1695x2507 sparse matrix of type '<class 'numpy.longlong'>'
 	with 2507 stored elements in COOrdinate format>,
 <1321x1695 sparse matrix of type '<class 'numpy.longlong'>'
 	with 1695 stored elements in COOrdinate format>,
 <1161x1321 sparse matrix of type '<class 'numpy.longlong'>'
 	with 1321 stored elements in COOrdinate format>]

In [530]:
def overwrite(path):
    with open(path, 'r+') as f:
        content = f.readlines()[2:]
        f.seek(0)
        f.writelines(content)
        f.truncate()
from graphzoom.utils import mtx2matrix
prefix=Path(f"graphzoom/reduction_results/{dataset}/border/")
if not prefix.exists():
    prefix.mkdir(parents=True)
for i in range(len(border_proj)):
    mmwrite(str(prefix.joinpath(f'Projection_{i+1}.mtx')), border_proj[i])
    overwrite(str(prefix.joinpath(f'Projection_{i+1}.mtx')))
# with open(str(prefix.joinpath(f'NumLevels.txt')), 'w') as f:
#     f.write(str(len(border_proj)))

reduce_results = f"graphzoom/reduction_results/{dataset}/border/"
border_projs, border_coarse_adj = construct_proj_laplacian(laplacian, 6, reduce_results)
mmwrite(str(prefix.joinpath(f'Gs.mtx')), border_coarse_adj[5],symmetry='symmetric')
overwrite(str(prefix.joinpath(f'Gs.mtx')))

In [531]:
border_coarse_adj

[<19717x19717 sparse matrix of type '<class 'numpy.int64'>'
 	with 108365 stored elements in COOrdinate format>,
 <8523x8523 sparse matrix of type '<class 'numpy.longlong'>'
 	with 65587 stored elements in Compressed Sparse Row format>,
 <4329x4329 sparse matrix of type '<class 'numpy.longlong'>'
 	with 43069 stored elements in Compressed Sparse Row format>,
 <2507x2507 sparse matrix of type '<class 'numpy.longlong'>'
 	with 27871 stored elements in Compressed Sparse Row format>,
 <1695x1695 sparse matrix of type '<class 'numpy.longlong'>'
 	with 18137 stored elements in Compressed Sparse Row format>,
 <1321x1321 sparse matrix of type '<class 'numpy.longlong'>'
 	with 11953 stored elements in Compressed Sparse Row format>]

In [507]:
(border_projs[0]!=projections[0])

True

In [314]:
g = mtx2matrix(str(prefix.joinpath(f'Gs.mtx')))

In [373]:
tg=nx.Graph()
# tg.add_nodes_from([1,2,3])
tg.add_edge(1,2)
tg.add_edge(2,1)

In [374]:
tg.number_of_edges()

1